diff --git a/Makefile b/Makefile
index 35bca73..9bba711 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-VERSION := 0.9.5.3
+VERSION := 0.9.5.4
.PHONY: lint vis clean common client server passwd\
subpkgs install uninstall reinstall scc
diff --git a/go.mod b/go.mod
index d99b855..0699d6f 100644
--- a/go.mod
+++ b/go.mod
@@ -1,37 +1,38 @@
module blitter.com/go/xs
-go 1.18
+go 1.20
require (
+ blitter.com/go/chacha20 v0.0.0-20200130200441-214e4085f54c
blitter.com/go/cryptmt v1.0.2
blitter.com/go/goutmp v1.0.6
+ blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64
blitter.com/go/herradurakex v1.0.0
- blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a
+ blitter.com/go/hopscotch v0.1.1
blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9
+ blitter.com/go/mtwist v1.0.1
blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae
github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da
github.com/creack/pty v1.1.18
github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f
+ github.com/klauspost/cpuid/v2 v2.2.5
+ github.com/klauspost/reedsolomon v1.11.8
github.com/kuking/go-frodokem v1.0.2
- github.com/mattn/go-isatty v0.0.16
+ github.com/mattn/go-isatty v0.0.19
+ github.com/pkg/errors v0.9.1
+ github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161
+ github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b
+ github.com/tjfoc/gmsm v1.4.1
github.com/xtaci/kcp-go v5.4.20+incompatible
- golang.org/x/crypto v0.0.0-20220919173607-35f4265a4bc0
- golang.org/x/sys v0.0.0-20220919091848-fb04ddd9f9c8
+ golang.org/x/crypto v0.13.0
+ golang.org/x/net v0.15.0
+ golang.org/x/sys v0.12.0
+ gopkg.in/hlandau/easymetric.v1 v1.0.0
+ gopkg.in/hlandau/measurable.v1 v1.0.1
gopkg.in/hlandau/passlib.v1 v1.0.11
)
require (
- blitter.com/go/chacha20 v0.0.0-20200130200441-214e4085f54c // indirect
- blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64 // indirect
- blitter.com/go/mtwist v1.0.1 // indirect
- github.com/klauspost/cpuid/v2 v2.1.1 // indirect
- github.com/klauspost/reedsolomon v1.11.0 // indirect
- github.com/pkg/errors v0.9.1 // indirect
- github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161 // indirect
- github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b // indirect
- github.com/tjfoc/gmsm v1.4.1 // indirect
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37 // indirect
- golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 // indirect
- gopkg.in/hlandau/easymetric.v1 v1.0.0 // indirect
- gopkg.in/hlandau/measurable.v1 v1.0.1 // indirect
+ golang.org/x/text v0.13.0 // indirect
)
diff --git a/go.sum b/go.sum
index 69fddf2..a800c7a 100644
--- a/go.sum
+++ b/go.sum
@@ -8,8 +8,8 @@ blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64 h1:SH6cZ4JiOTmWGeVd5hC
blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64/go.mod h1:YMdIR/gCtFwU/a09jyWAwUu2J9CQejUFwkfD+PyVg+4=
blitter.com/go/herradurakex v1.0.0 h1:6XaxY+JLT1HUWPF0gYJnjX3pVjrw4YhYZEzZ1U0wkyc=
blitter.com/go/herradurakex v1.0.0/go.mod h1:m3+vYZX+2dDjdo+n/HDnXEYJX9pwmNeQLgAfJM8mtxw=
-blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a h1:1fEN7eJMG9TweQuGMAgQlTJ0Wl7lsdDL4Nt5gHZijhY=
-blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a/go.mod h1:LtcFd2/R9xcau5SZIYeaHvdqAM7Y5pyvdZYT5J9HAME=
+blitter.com/go/hopscotch v0.1.1 h1:hh809THr3I52J5G5QozNhDSd+qGwXWGqLh3FJBGrp+o=
+blitter.com/go/hopscotch v0.1.1/go.mod h1:hCz7oE31KjaO9M6+s2DcyVNlAA8saE/AaVYKFs7hl1I=
blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 h1:D45AnrNphtvczBXRp5JQicZRTgaK/Is5bgPDDvRKhTc=
blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9/go.mod h1:SK6QfGG72lIfKW1Td0wH7f0wwN5nSIhV3K+wvzGNjrw=
blitter.com/go/mtwist v1.0.1 h1:PxmoWexfMpLmc8neHP/PcRc3s17ct7iz4d5W/qJVt04=
@@ -47,14 +47,14 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y=
github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f/go.mod h1:u+9Snq0w+ZdYKi8BBoaxnEwWu0fY4Kvu9ByFpM51t1s=
-github.com/klauspost/cpuid/v2 v2.1.1 h1:t0wUqjowdm8ezddV5k0tLWVklVuvLJpoHeb4WBdydm0=
-github.com/klauspost/cpuid/v2 v2.1.1/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
-github.com/klauspost/reedsolomon v1.11.0 h1:fc24kMFf4I6dXJwSkVAsw8Za/dMcJrV5ImeDjG3ss1M=
-github.com/klauspost/reedsolomon v1.11.0/go.mod h1:FXLZzlJIdfqEnQLdUKWNRuMZg747hZ4oYp2Ml60Lb/k=
+github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
+github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY=
+github.com/klauspost/reedsolomon v1.11.8/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A=
github.com/kuking/go-frodokem v1.0.2 h1:sxdguENCyr6WnLbJ/cjz0AYCW75H1b+E6zXY2ldZnUU=
github.com/kuking/go-frodokem v1.0.2/go.mod h1:83ZX1kHOd72ouCsvbffCqJIj7Ih83MQTAjH2QbqzLZk=
-github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -81,8 +81,8 @@ golang.org/x/crypto v0.0.0-20200128174031-69ecbb4d6d5d/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20220919173607-35f4265a4bc0 h1:a5Yg6ylndHHYJqIPrdq0AhvR6KTvDTAvgBtaidhEevY=
-golang.org/x/crypto v0.0.0-20220919173607-35f4265a4bc0/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -93,8 +93,8 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -104,12 +104,14 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190902133755-9109b7679e13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220919091848-fb04ddd9f9c8 h1:h+EGohizhe9XlX18rfpa8k8RAc5XyaeamM+0VHRd4lc=
-golang.org/x/sys v0.0.0-20220919091848-fb04ddd9f9c8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
diff --git a/vendor/blitter.com/go/chacha20/LICENSE b/vendor/blitter.com/go/chacha20/LICENSE
deleted file mode 100644
index dba13ed..0000000
--- a/vendor/blitter.com/go/chacha20/LICENSE
+++ /dev/null
@@ -1,661 +0,0 @@
- GNU AFFERO GENERAL PUBLIC LICENSE
- Version 3, 19 November 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc.
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
- A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate. Many developers of free software are heartened and
-encouraged by the resulting cooperation. However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
- The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community. It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server. Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
- An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals. This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU Affero General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Remote Network Interaction; Use with the GNU General Public License.
-
- Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software. This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time. Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-
- Copyright (C)
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-
-Also add information on how to contact you by electronic and paper mail.
-
- If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source. For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code. There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for the
-specific requirements.
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU AGPL, see
-.
diff --git a/vendor/blitter.com/go/chacha20/README.md b/vendor/blitter.com/go/chacha20/README.md
deleted file mode 100644
index 104f4ac..0000000
--- a/vendor/blitter.com/go/chacha20/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-### chacha20 - ChaCha20
-#### Yawning Angel (yawning at schwanenlied dot me)
-
-Yet another Go ChaCha20 implementation. Everything else I found was slow,
-didn't support all the variants I need to use, or relied on cgo to go fast.
-
-Features:
-
- * 20 round, 256 bit key only. Everything else is pointless and stupid.
- * IETF 96 bit nonce variant.
- * XChaCha 24 byte nonce variant.
- * SSSE3 and AVX2 support on amd64 targets.
- * Incremental encrypt/decrypt support, unlike golang.org/x/crypto/salsa20.
diff --git a/vendor/blitter.com/go/chacha20/chacha20.go b/vendor/blitter.com/go/chacha20/chacha20.go
deleted file mode 100644
index fa90fb1..0000000
--- a/vendor/blitter.com/go/chacha20/chacha20.go
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// Package chacha20 implements the ChaCha20 stream cipher.
-package chacha20 // import "blitter.com/go/chacha20"
-
-import (
- "crypto/cipher"
- "encoding/binary"
- "errors"
- "math"
-
- "blitter.com/go/chacha20/internal/api"
- "blitter.com/go/chacha20/internal/hardware"
- "blitter.com/go/chacha20/internal/ref"
-)
-
-const (
- // KeySize is the ChaCha20 key size in bytes.
- KeySize = 32
-
- // NonceSize is the ChaCha20 nonce size in bytes.
- NonceSize = 8
-
- // INonceSize is the IETF ChaCha20 nonce size in bytes.
- INonceSize = 12
-
- // XNonceSize is the XChaCha20 nonce size in bytes.
- XNonceSize = 24
-
- // HNonceSize is the HChaCha20 nonce size in bytes.
- HNonceSize = 16
-)
-
-var (
- // ErrInvalidKey is the error returned when the key is invalid.
- ErrInvalidKey = errors.New("chacha20: key length must be KeySize bytes")
-
- // ErrInvalidNonce is the error returned when the nonce is invalid.
- ErrInvalidNonce = errors.New("chacha20: nonce length must be NonceSize/INonceSize/XNonceSize bytes")
-
- // ErrInvalidCounter is the error returned when the counter is invalid.
- ErrInvalidCounter = errors.New("chacha20: block counter is invalid (out of range)")
-
- supportedImpls []api.Implementation
- activeImpl api.Implementation
-
- _ cipher.Stream = (*Cipher)(nil)
-)
-
-// Cipher is an instance of ChaCha20/XChaCha20 using a particular key and nonce.
-type Cipher struct {
- state [api.StateSize]uint32
- buf [api.BlockSize]byte
-
- off int
- ietf bool
-}
-
-// Reset zeros the key data so that it will no longer appear in the process's
-// memory.
-func (c *Cipher) Reset() {
- for i := range c.state {
- c.state[i] = 0
- }
- for i := range c.buf {
- c.buf[i] = 0
- }
-}
-
-// Seek sets the block counter to a given offset.
-func (c *Cipher) Seek(blockCounter uint64) error {
- if c.ietf {
- if blockCounter > math.MaxUint32 {
- return ErrInvalidCounter
- }
- c.state[12] = uint32(blockCounter)
- } else {
- c.state[12] = uint32(blockCounter)
- c.state[13] = uint32(blockCounter >> 32)
- }
- c.off = api.BlockSize
- return nil
-}
-
-// ReKey reinitializes the ChaCha20/XChaCha20 instance with the provided key
-// and nonce.
-func (c *Cipher) ReKey(key, nonce []byte) error {
- c.Reset()
- return c.doReKey(key, nonce)
-}
-
-func (c *Cipher) doReKey(key, nonce []byte) error {
- if len(key) != KeySize {
- return ErrInvalidKey
- }
-
- var subKey []byte
- switch len(nonce) {
- case NonceSize, INonceSize:
- case XNonceSize:
- subKey = c.buf[:KeySize]
- activeImpl.HChaCha(key, nonce, subKey)
- key = subKey
- nonce = nonce[16:24]
- default:
- return ErrInvalidNonce
- }
-
- _ = key[31] // Force bounds check elimination.
-
- c.state[0] = api.Sigma0
- c.state[1] = api.Sigma1
- c.state[2] = api.Sigma2
- c.state[3] = api.Sigma3
- c.state[4] = binary.LittleEndian.Uint32(key[0:4])
- c.state[5] = binary.LittleEndian.Uint32(key[4:8])
- c.state[6] = binary.LittleEndian.Uint32(key[8:12])
- c.state[7] = binary.LittleEndian.Uint32(key[12:16])
- c.state[8] = binary.LittleEndian.Uint32(key[16:20])
- c.state[9] = binary.LittleEndian.Uint32(key[20:24])
- c.state[10] = binary.LittleEndian.Uint32(key[24:28])
- c.state[11] = binary.LittleEndian.Uint32(key[28:32])
- c.state[12] = 0
- if len(nonce) == INonceSize {
- _ = nonce[11] // Force bounds check elimination.
- c.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
- c.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
- c.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
- c.ietf = true
- } else {
- _ = nonce[7] // Force bounds check elimination.
- c.state[13] = 0
- c.state[14] = binary.LittleEndian.Uint32(nonce[0:4])
- c.state[15] = binary.LittleEndian.Uint32(nonce[4:8])
- c.ietf = false
- }
- c.off = api.BlockSize
-
- if subKey != nil {
- for i := range subKey {
- subKey[i] = 0
- }
- }
-
- return nil
-}
-
-// New returns a new ChaCha20/XChaCha20 instance.
-func New(key, nonce []byte) (*Cipher, error) {
- var c Cipher
- if err := c.doReKey(key, nonce); err != nil {
- return nil, err
- }
-
- return &c, nil
-}
-
-// HChaCha is the HChaCha20 hash function used to make XChaCha.
-func HChaCha(key, nonce []byte, dst *[32]byte) {
- activeImpl.HChaCha(key, nonce, dst[:])
-}
-
-// XORKeyStream sets dst to the result of XORing src with the key stream. Dst
-// and src may be the same slice but otherwise should not overlap.
-func (c *Cipher) XORKeyStream(dst, src []byte) {
- if len(dst) < len(src) {
- src = src[:len(dst)]
- }
-
- for remaining := len(src); remaining > 0; {
- // Process multiple blocks at once.
- if c.off == api.BlockSize {
- nrBlocks := remaining / api.BlockSize
- directBytes := nrBlocks * api.BlockSize
- if nrBlocks > 0 {
- c.doBlocks(dst, src, nrBlocks)
- remaining -= directBytes
- if remaining == 0 {
- return
- }
- dst = dst[directBytes:]
- src = src[directBytes:]
- }
-
- // If there's a partial block, generate 1 block of keystream into
- // the internal buffer.
- c.doBlocks(c.buf[:], nil, 1)
- c.off = 0
- }
-
- // Process partial blocks from the buffered keystream.
- toXor := api.BlockSize - c.off
- if remaining < toXor {
- toXor = remaining
- }
- if toXor > 0 {
- // The inliner doesn't want to inline this function, but my
- // attempts to force BCE don't seem to work with manual
- // inlining.
- //
- // Taking the extra function call overhead here appears to be
- // worth it.
- c.xorBufBytes(dst, src, toXor)
-
- dst = dst[toXor:]
- src = src[toXor:]
-
- remaining -= toXor
- }
- }
-}
-
-func (c *Cipher) xorBufBytes(dst, src []byte, n int) {
- // Force bounds check elimination.
- buf := c.buf[c.off:]
- _ = buf[n-1]
- _ = dst[n-1]
- _ = src[n-1]
-
- for i := 0; i < n; i++ {
- dst[i] = buf[i] ^ src[i]
- }
- c.off += n
-}
-
-// KeyStream sets dst to the raw keystream.
-func (c *Cipher) KeyStream(dst []byte) {
- for remaining := len(dst); remaining > 0; {
- // Process multiple blocks at once.
- if c.off == api.BlockSize {
- nrBlocks := remaining / api.BlockSize
- directBytes := nrBlocks * api.BlockSize
- if nrBlocks > 0 {
- c.doBlocks(dst, nil, nrBlocks)
- remaining -= directBytes
- if remaining == 0 {
- return
- }
- dst = dst[directBytes:]
- }
-
- // If there's a partial block, generate 1 block of keystream into
- // the internal buffer.
- c.doBlocks(c.buf[:], nil, 1)
- c.off = 0
- }
-
- // Process partial blocks from the buffered keystream.
- toCopy := api.BlockSize - c.off
- if remaining < toCopy {
- toCopy = remaining
- }
- if toCopy > 0 {
- copy(dst[:toCopy], c.buf[c.off:c.off+toCopy])
- dst = dst[toCopy:]
- remaining -= toCopy
- c.off += toCopy
- }
- }
-}
-
-func (c *Cipher) doBlocks(dst, src []byte, nrBlocks int) {
- if c.ietf {
- ctr := uint64(c.state[12])
- if ctr+uint64(nrBlocks) > math.MaxUint32 {
- panic("chacha20: will exceed key stream per nonce limit")
- }
- }
-
- activeImpl.Blocks(&c.state, dst, src, nrBlocks)
-}
-
-func init() {
- supportedImpls = hardware.Register(supportedImpls)
- supportedImpls = ref.Register(supportedImpls)
- activeImpl = supportedImpls[0]
-}
diff --git a/vendor/blitter.com/go/chacha20/internal/api/api.go b/vendor/blitter.com/go/chacha20/internal/api/api.go
deleted file mode 100644
index e347812..0000000
--- a/vendor/blitter.com/go/chacha20/internal/api/api.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// Package api provides the ChaCha20 implementation abstract interface.
-package api
-
-const (
- // BlockSize is the size of a ChaCha20 block in bytes.
- BlockSize = 64
-
- // StateSize is the size of the ChaCha20 state as 32 bit unsigned words.
- StateSize = 16
-
- // HashSize is the size of the HChaCha output in bytes.
- HashSize = 32
-
- // HNonceSize is the HChaCha20 nonce size in bytes.
- HNonceSize = 16
-
- // Sigma0 is the first word of the ChaCha constant.
- Sigma0 = uint32(0x61707865)
-
- // Sigma1 is the second word of the ChaCha constant.
- Sigma1 = uint32(0x3320646e)
-
- // Sigma2 is the third word of the ChaCha constant.
- Sigma2 = uint32(0x79622d32)
-
- // Sigma3 is the fourth word of the ChaCha constant.
- Sigma3 = uint32(0x6b206574)
-)
-
-// Implementation is a ChaCha20 implementation
-type Implementation interface {
- // Name returns the name of the implementation.
- Name() string
-
- // Blocks calculates the ChaCha20 blocks. If src is not nil, dst will
- // be set to the XOR of src with the key stream, otherwise dst will be
- // set to the key stream.
- Blocks(x *[StateSize]uint32, dst, src []byte, nrBlocks int)
-
- // HChaCha calculates the HChaCha20 hash.
- //
- // Note: `dst` is guaranteed to be HashSize bytes.
- HChaCha(key, nonce []byte, dst []byte)
-}
diff --git a/vendor/blitter.com/go/chacha20/internal/hardware/impl.go b/vendor/blitter.com/go/chacha20/internal/hardware/impl.go
deleted file mode 100644
index 0c63e3a..0000000
--- a/vendor/blitter.com/go/chacha20/internal/hardware/impl.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// Package hardware provides the hardware accelerated ChaCha20 implementations.
-package hardware
-
-import "blitter.com/go/chacha20/internal/api"
-
-var hardwareImpls []api.Implementation
-
-// Register appends the implementation(s) to the provided slice, and returns the
-// new slice.
-func Register(impls []api.Implementation) []api.Implementation {
- return append(impls, hardwareImpls...)
-}
diff --git a/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.go b/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.go
deleted file mode 100644
index 31cbf8b..0000000
--- a/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.go
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// +build amd64,!noasm
-
-package hardware
-
-import (
- "golang.org/x/sys/cpu"
-
- "blitter.com/go/chacha20/internal/api"
-)
-
-//go:noescape
-func blocksAVX2(s *[api.StateSize]uint32, in, out []byte)
-
-//go:noescape
-func hChaChaAVX2(key, nonce []byte, dst *byte)
-
-//go:noescape
-func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte)
-
-//go:noescape
-func hChaChaSSSE3(key, nonce []byte, dst *byte)
-
-type implAmd64 struct {
- name string
-
- blocksFn func(*[api.StateSize]uint32, []byte, []byte, int)
- hChaChaFn func([]byte, []byte, *byte)
-}
-
-func (impl *implAmd64) Name() string {
- return impl.name
-}
-
-func (impl *implAmd64) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
- impl.blocksFn(x, dst, src, nrBlocks)
-}
-
-func (impl *implAmd64) HChaCha(key, nonce []byte, dst []byte) {
- impl.hChaChaFn(key, nonce, &dst[0])
-}
-
-func blockWrapper(fn func(*[api.StateSize]uint32, []byte, []byte)) func(*[api.StateSize]uint32, []byte, []byte, int) {
- return func(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
- sz := nrBlocks * api.BlockSize
- if src != nil {
- fn(x, src[:sz], dst[:sz])
- } else {
- // Sub-optimal, but the compiler special cases this to an assembly
- // optimized runtime.memclrNoHeapPointers, so it's not terrible.
- for i := range dst[:sz] {
- dst[i] = 0
- }
- fn(x, dst[:sz], dst[:sz])
- }
- }
-}
-
-func init() {
- if cpu.X86.HasAVX2 {
- hardwareImpls = append(hardwareImpls, &implAmd64{
- name: "amd64_avx2",
- blocksFn: blockWrapper(blocksAVX2),
- hChaChaFn: hChaChaAVX2,
- })
- }
- if cpu.X86.HasSSE3 {
- hardwareImpls = append(hardwareImpls, &implAmd64{
- name: "amd64_ssse3",
- blocksFn: blockWrapper(blocksSSSE3),
- hChaChaFn: hChaChaSSSE3,
- })
- }
-}
diff --git a/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.s b/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.s
deleted file mode 100644
index fb93eef..0000000
--- a/vendor/blitter.com/go/chacha20/internal/hardware/impl_amd64.s
+++ /dev/null
@@ -1,1682 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// +build !noasm
-
-#include "textflag.h"
-
-DATA ·chacha_constants<>+0x00(SB)/4, $0x61707865
-DATA ·chacha_constants<>+0x04(SB)/4, $0x3320646E
-DATA ·chacha_constants<>+0x08(SB)/4, $0x79622D32
-DATA ·chacha_constants<>+0x0c(SB)/4, $0x6B206574
-DATA ·chacha_constants<>+0x10(SB)/8, $0x0504070601000302
-DATA ·chacha_constants<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
-DATA ·chacha_constants<>+0x20(SB)/8, $0x0605040702010003
-DATA ·chacha_constants<>+0x28(SB)/8, $0x0E0D0C0F0A09080B
-GLOBL ·chacha_constants<>(SB), (NOPTR+RODATA), $48
-
-// func blocksAVX2(s *[api.StateSize]uint32, in, out []byte)
-TEXT ·blocksAVX2(SB), NOSPLIT, $576-56
- // This is Andrew Moon's AVX2 ChaCha implementation taken from
- // supercop-20171218, with some minor changes, primarily calling
- // convention and assembly dialect related.
-
- // Align the stack on a 64 byte boundary.
- MOVQ SP, BP
- ADDQ $64, BP
- ANDQ $-64, BP
-
- // Go calling convention -> SYSV AMD64 (and a fixup).
- MOVQ s+0(FP), DI // &s -> DI
- ADDQ $16, DI // Skip the ChaCha constants in the chachaState.
- MOVQ in+8(FP), SI // &in[0] -> SI
- MOVQ out+32(FP), DX // &out[0] -> DX
- MOVQ in_len+16(FP), CX // len(in) -> CX
-
- // Begin the main body of `chacha_blocks_avx2`.
- //
- // Mostly a direct translation except:
- // * The number of rounds is always 20.
- // * %rbp is used instead of %rsp.
- LEAQ ·chacha_constants<>(SB), AX
- VMOVDQU 0(AX), X8
- VMOVDQU 16(AX), X6
- VMOVDQU 32(AX), X7
- VMOVDQU 0(DI), X9
- VMOVDQU 16(DI), X10
- VMOVDQU 32(DI), X11
-
- // MOVQ 48(DI), AX
- MOVQ $1, R9
- VMOVDQA X8, 0(BP)
- VMOVDQA X9, 16(BP)
- VMOVDQA X10, 32(BP)
- VMOVDQA X11, 48(BP)
-
- // MOVQ AX, 64(BP)
- VMOVDQA X6, 448(BP)
- VMOVDQA X6, 464(BP)
- VMOVDQA X7, 480(BP)
- VMOVDQA X7, 496(BP)
- CMPQ CX, $512
- JAE chacha_blocks_avx2_atleast512
- CMPQ CX, $256
- JAE chacha_blocks_avx2_atleast256
- JMP chacha_blocks_avx2_below256
-
-chacha_blocks_avx2_atleast512:
- MOVQ 48(BP), AX
- LEAQ 1(AX), R8
- LEAQ 2(AX), R9
- LEAQ 3(AX), R10
- LEAQ 4(AX), BX
- LEAQ 5(AX), R11
- LEAQ 6(AX), R12
- LEAQ 7(AX), R13
- LEAQ 8(AX), R14
- MOVL AX, 128(BP)
- MOVL R8, 4+128(BP)
- MOVL R9, 8+128(BP)
- MOVL R10, 12+128(BP)
- MOVL BX, 16+128(BP)
- MOVL R11, 20+128(BP)
- MOVL R12, 24+128(BP)
- MOVL R13, 28+128(BP)
- SHRQ $32, AX
- SHRQ $32, R8
- SHRQ $32, R9
- SHRQ $32, R10
- SHRQ $32, BX
- SHRQ $32, R11
- SHRQ $32, R12
- SHRQ $32, R13
- MOVL AX, 160(BP)
- MOVL R8, 4+160(BP)
- MOVL R9, 8+160(BP)
- MOVL R10, 12+160(BP)
- MOVL BX, 16+160(BP)
- MOVL R11, 20+160(BP)
- MOVL R12, 24+160(BP)
- MOVL R13, 28+160(BP)
- MOVQ R14, 48(BP)
-
- // MOVQ 64(BP), AX
- MOVQ $20, AX
- VPBROADCASTD 0(BP), Y0
- VPBROADCASTD 4+0(BP), Y1
- VPBROADCASTD 8+0(BP), Y2
- VPBROADCASTD 12+0(BP), Y3
- VPBROADCASTD 16(BP), Y4
- VPBROADCASTD 4+16(BP), Y5
- VPBROADCASTD 8+16(BP), Y6
- VPBROADCASTD 12+16(BP), Y7
- VPBROADCASTD 32(BP), Y8
- VPBROADCASTD 4+32(BP), Y9
- VPBROADCASTD 8+32(BP), Y10
- VPBROADCASTD 12+32(BP), Y11
- VPBROADCASTD 8+48(BP), Y14
- VPBROADCASTD 12+48(BP), Y15
- VMOVDQA 128(BP), Y12
- VMOVDQA 160(BP), Y13
-
-chacha_blocks_avx2_mainloop1:
- VPADDD Y0, Y4, Y0
- VPADDD Y1, Y5, Y1
- VPXOR Y12, Y0, Y12
- VPXOR Y13, Y1, Y13
- VPADDD Y2, Y6, Y2
- VPADDD Y3, Y7, Y3
- VPXOR Y14, Y2, Y14
- VPXOR Y15, Y3, Y15
- VPSHUFB 448(BP), Y12, Y12
- VPSHUFB 448(BP), Y13, Y13
- VPADDD Y8, Y12, Y8
- VPADDD Y9, Y13, Y9
- VPSHUFB 448(BP), Y14, Y14
- VPSHUFB 448(BP), Y15, Y15
- VPADDD Y10, Y14, Y10
- VPADDD Y11, Y15, Y11
- VMOVDQA Y12, 96(BP)
- VPXOR Y4, Y8, Y4
- VPXOR Y5, Y9, Y5
- VPSLLD $ 12, Y4, Y12
- VPSRLD $20, Y4, Y4
- VPXOR Y4, Y12, Y4
- VPSLLD $ 12, Y5, Y12
- VPSRLD $20, Y5, Y5
- VPXOR Y5, Y12, Y5
- VPXOR Y6, Y10, Y6
- VPXOR Y7, Y11, Y7
- VPSLLD $ 12, Y6, Y12
- VPSRLD $20, Y6, Y6
- VPXOR Y6, Y12, Y6
- VPSLLD $ 12, Y7, Y12
- VPSRLD $20, Y7, Y7
- VPXOR Y7, Y12, Y7
- VPADDD Y0, Y4, Y0
- VPADDD Y1, Y5, Y1
- VPXOR 96(BP), Y0, Y12
- VPXOR Y13, Y1, Y13
- VPADDD Y2, Y6, Y2
- VPADDD Y3, Y7, Y3
- VPXOR Y14, Y2, Y14
- VPXOR Y15, Y3, Y15
- VPSHUFB 480(BP), Y12, Y12
- VPSHUFB 480(BP), Y13, Y13
- VPADDD Y8, Y12, Y8
- VPADDD Y9, Y13, Y9
- VPSHUFB 480(BP), Y14, Y14
- VPSHUFB 480(BP), Y15, Y15
- VPADDD Y10, Y14, Y10
- VPADDD Y11, Y15, Y11
- VMOVDQA Y12, 96(BP)
- VPXOR Y4, Y8, Y4
- VPXOR Y5, Y9, Y5
- VPSLLD $ 7, Y4, Y12
- VPSRLD $25, Y4, Y4
- VPXOR Y4, Y12, Y4
- VPSLLD $ 7, Y5, Y12
- VPSRLD $25, Y5, Y5
- VPXOR Y5, Y12, Y5
- VPXOR Y6, Y10, Y6
- VPXOR Y7, Y11, Y7
- VPSLLD $ 7, Y6, Y12
- VPSRLD $25, Y6, Y6
- VPXOR Y6, Y12, Y6
- VPSLLD $ 7, Y7, Y12
- VPSRLD $25, Y7, Y7
- VPXOR Y7, Y12, Y7
- VPADDD Y0, Y5, Y0
- VPADDD Y1, Y6, Y1
- VPXOR Y15, Y0, Y15
- VPXOR 96(BP), Y1, Y12
- VPADDD Y2, Y7, Y2
- VPADDD Y3, Y4, Y3
- VPXOR Y13, Y2, Y13
- VPXOR Y14, Y3, Y14
- VPSHUFB 448(BP), Y15, Y15
- VPSHUFB 448(BP), Y12, Y12
- VPADDD Y10, Y15, Y10
- VPADDD Y11, Y12, Y11
- VPSHUFB 448(BP), Y13, Y13
- VPSHUFB 448(BP), Y14, Y14
- VPADDD Y8, Y13, Y8
- VPADDD Y9, Y14, Y9
- VMOVDQA Y15, 96(BP)
- VPXOR Y5, Y10, Y5
- VPXOR Y6, Y11, Y6
- VPSLLD $ 12, Y5, Y15
- VPSRLD $20, Y5, Y5
- VPXOR Y5, Y15, Y5
- VPSLLD $ 12, Y6, Y15
- VPSRLD $20, Y6, Y6
- VPXOR Y6, Y15, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y4, Y9, Y4
- VPSLLD $ 12, Y7, Y15
- VPSRLD $20, Y7, Y7
- VPXOR Y7, Y15, Y7
- VPSLLD $ 12, Y4, Y15
- VPSRLD $20, Y4, Y4
- VPXOR Y4, Y15, Y4
- VPADDD Y0, Y5, Y0
- VPADDD Y1, Y6, Y1
- VPXOR 96(BP), Y0, Y15
- VPXOR Y12, Y1, Y12
- VPADDD Y2, Y7, Y2
- VPADDD Y3, Y4, Y3
- VPXOR Y13, Y2, Y13
- VPXOR Y14, Y3, Y14
- VPSHUFB 480(BP), Y15, Y15
- VPSHUFB 480(BP), Y12, Y12
- VPADDD Y10, Y15, Y10
- VPADDD Y11, Y12, Y11
- VPSHUFB 480(BP), Y13, Y13
- VPSHUFB 480(BP), Y14, Y14
- VPADDD Y8, Y13, Y8
- VPADDD Y9, Y14, Y9
- VMOVDQA Y15, 96(BP)
- VPXOR Y5, Y10, Y5
- VPXOR Y6, Y11, Y6
- VPSLLD $ 7, Y5, Y15
- VPSRLD $25, Y5, Y5
- VPXOR Y5, Y15, Y5
- VPSLLD $ 7, Y6, Y15
- VPSRLD $25, Y6, Y6
- VPXOR Y6, Y15, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y4, Y9, Y4
- VPSLLD $ 7, Y7, Y15
- VPSRLD $25, Y7, Y7
- VPXOR Y7, Y15, Y7
- VPSLLD $ 7, Y4, Y15
- VPSRLD $25, Y4, Y4
- VPXOR Y4, Y15, Y4
- VMOVDQA 96(BP), Y15
- SUBQ $2, AX
- JNZ chacha_blocks_avx2_mainloop1
- VMOVDQA Y8, 192(BP)
- VMOVDQA Y9, 224(BP)
- VMOVDQA Y10, 256(BP)
- VMOVDQA Y11, 288(BP)
- VMOVDQA Y12, 320(BP)
- VMOVDQA Y13, 352(BP)
- VMOVDQA Y14, 384(BP)
- VMOVDQA Y15, 416(BP)
- VPBROADCASTD 0(BP), Y8
- VPBROADCASTD 4+0(BP), Y9
- VPBROADCASTD 8+0(BP), Y10
- VPBROADCASTD 12+0(BP), Y11
- VPBROADCASTD 16(BP), Y12
- VPBROADCASTD 4+16(BP), Y13
- VPBROADCASTD 8+16(BP), Y14
- VPBROADCASTD 12+16(BP), Y15
- VPADDD Y8, Y0, Y0
- VPADDD Y9, Y1, Y1
- VPADDD Y10, Y2, Y2
- VPADDD Y11, Y3, Y3
- VPADDD Y12, Y4, Y4
- VPADDD Y13, Y5, Y5
- VPADDD Y14, Y6, Y6
- VPADDD Y15, Y7, Y7
- VPUNPCKLDQ Y1, Y0, Y8
- VPUNPCKLDQ Y3, Y2, Y9
- VPUNPCKHDQ Y1, Y0, Y12
- VPUNPCKHDQ Y3, Y2, Y13
- VPUNPCKLDQ Y5, Y4, Y10
- VPUNPCKLDQ Y7, Y6, Y11
- VPUNPCKHDQ Y5, Y4, Y14
- VPUNPCKHDQ Y7, Y6, Y15
- VPUNPCKLQDQ Y9, Y8, Y0
- VPUNPCKLQDQ Y11, Y10, Y1
- VPUNPCKHQDQ Y9, Y8, Y2
- VPUNPCKHQDQ Y11, Y10, Y3
- VPUNPCKLQDQ Y13, Y12, Y4
- VPUNPCKLQDQ Y15, Y14, Y5
- VPUNPCKHQDQ Y13, Y12, Y6
- VPUNPCKHQDQ Y15, Y14, Y7
- VPERM2I128 $0x20, Y1, Y0, Y8
- VPERM2I128 $0x20, Y3, Y2, Y9
- VPERM2I128 $0x31, Y1, Y0, Y12
- VPERM2I128 $0x31, Y3, Y2, Y13
- VPERM2I128 $0x20, Y5, Y4, Y10
- VPERM2I128 $0x20, Y7, Y6, Y11
- VPERM2I128 $0x31, Y5, Y4, Y14
- VPERM2I128 $0x31, Y7, Y6, Y15
- ANDQ SI, SI
- JZ chacha_blocks_avx2_noinput1
- VPXOR 0(SI), Y8, Y8
- VPXOR 64(SI), Y9, Y9
- VPXOR 128(SI), Y10, Y10
- VPXOR 192(SI), Y11, Y11
- VPXOR 256(SI), Y12, Y12
- VPXOR 320(SI), Y13, Y13
- VPXOR 384(SI), Y14, Y14
- VPXOR 448(SI), Y15, Y15
- VMOVDQU Y8, 0(DX)
- VMOVDQU Y9, 64(DX)
- VMOVDQU Y10, 128(DX)
- VMOVDQU Y11, 192(DX)
- VMOVDQU Y12, 256(DX)
- VMOVDQU Y13, 320(DX)
- VMOVDQU Y14, 384(DX)
- VMOVDQU Y15, 448(DX)
- VMOVDQA 192(BP), Y0
- VMOVDQA 224(BP), Y1
- VMOVDQA 256(BP), Y2
- VMOVDQA 288(BP), Y3
- VMOVDQA 320(BP), Y4
- VMOVDQA 352(BP), Y5
- VMOVDQA 384(BP), Y6
- VMOVDQA 416(BP), Y7
- VPBROADCASTD 32(BP), Y8
- VPBROADCASTD 4+32(BP), Y9
- VPBROADCASTD 8+32(BP), Y10
- VPBROADCASTD 12+32(BP), Y11
- VMOVDQA 128(BP), Y12
- VMOVDQA 160(BP), Y13
- VPBROADCASTD 8+48(BP), Y14
- VPBROADCASTD 12+48(BP), Y15
- VPADDD Y8, Y0, Y0
- VPADDD Y9, Y1, Y1
- VPADDD Y10, Y2, Y2
- VPADDD Y11, Y3, Y3
- VPADDD Y12, Y4, Y4
- VPADDD Y13, Y5, Y5
- VPADDD Y14, Y6, Y6
- VPADDD Y15, Y7, Y7
- VPUNPCKLDQ Y1, Y0, Y8
- VPUNPCKLDQ Y3, Y2, Y9
- VPUNPCKHDQ Y1, Y0, Y12
- VPUNPCKHDQ Y3, Y2, Y13
- VPUNPCKLDQ Y5, Y4, Y10
- VPUNPCKLDQ Y7, Y6, Y11
- VPUNPCKHDQ Y5, Y4, Y14
- VPUNPCKHDQ Y7, Y6, Y15
- VPUNPCKLQDQ Y9, Y8, Y0
- VPUNPCKLQDQ Y11, Y10, Y1
- VPUNPCKHQDQ Y9, Y8, Y2
- VPUNPCKHQDQ Y11, Y10, Y3
- VPUNPCKLQDQ Y13, Y12, Y4
- VPUNPCKLQDQ Y15, Y14, Y5
- VPUNPCKHQDQ Y13, Y12, Y6
- VPUNPCKHQDQ Y15, Y14, Y7
- VPERM2I128 $0x20, Y1, Y0, Y8
- VPERM2I128 $0x20, Y3, Y2, Y9
- VPERM2I128 $0x31, Y1, Y0, Y12
- VPERM2I128 $0x31, Y3, Y2, Y13
- VPERM2I128 $0x20, Y5, Y4, Y10
- VPERM2I128 $0x20, Y7, Y6, Y11
- VPERM2I128 $0x31, Y5, Y4, Y14
- VPERM2I128 $0x31, Y7, Y6, Y15
- VPXOR 32(SI), Y8, Y8
- VPXOR 96(SI), Y9, Y9
- VPXOR 160(SI), Y10, Y10
- VPXOR 224(SI), Y11, Y11
- VPXOR 288(SI), Y12, Y12
- VPXOR 352(SI), Y13, Y13
- VPXOR 416(SI), Y14, Y14
- VPXOR 480(SI), Y15, Y15
- VMOVDQU Y8, 32(DX)
- VMOVDQU Y9, 96(DX)
- VMOVDQU Y10, 160(DX)
- VMOVDQU Y11, 224(DX)
- VMOVDQU Y12, 288(DX)
- VMOVDQU Y13, 352(DX)
- VMOVDQU Y14, 416(DX)
- VMOVDQU Y15, 480(DX)
- ADDQ $512, SI
- JMP chacha_blocks_avx2_mainloop1_cont
-
-chacha_blocks_avx2_noinput1:
- VMOVDQU Y8, 0(DX)
- VMOVDQU Y9, 64(DX)
- VMOVDQU Y10, 128(DX)
- VMOVDQU Y11, 192(DX)
- VMOVDQU Y12, 256(DX)
- VMOVDQU Y13, 320(DX)
- VMOVDQU Y14, 384(DX)
- VMOVDQU Y15, 448(DX)
- VMOVDQA 192(BP), Y0
- VMOVDQA 224(BP), Y1
- VMOVDQA 256(BP), Y2
- VMOVDQA 288(BP), Y3
- VMOVDQA 320(BP), Y4
- VMOVDQA 352(BP), Y5
- VMOVDQA 384(BP), Y6
- VMOVDQA 416(BP), Y7
- VPBROADCASTD 32(BP), Y8
- VPBROADCASTD 4+32(BP), Y9
- VPBROADCASTD 8+32(BP), Y10
- VPBROADCASTD 12+32(BP), Y11
- VMOVDQA 128(BP), Y12
- VMOVDQA 160(BP), Y13
- VPBROADCASTD 8+48(BP), Y14
- VPBROADCASTD 12+48(BP), Y15
- VPADDD Y8, Y0, Y0
- VPADDD Y9, Y1, Y1
- VPADDD Y10, Y2, Y2
- VPADDD Y11, Y3, Y3
- VPADDD Y12, Y4, Y4
- VPADDD Y13, Y5, Y5
- VPADDD Y14, Y6, Y6
- VPADDD Y15, Y7, Y7
- VPUNPCKLDQ Y1, Y0, Y8
- VPUNPCKLDQ Y3, Y2, Y9
- VPUNPCKHDQ Y1, Y0, Y12
- VPUNPCKHDQ Y3, Y2, Y13
- VPUNPCKLDQ Y5, Y4, Y10
- VPUNPCKLDQ Y7, Y6, Y11
- VPUNPCKHDQ Y5, Y4, Y14
- VPUNPCKHDQ Y7, Y6, Y15
- VPUNPCKLQDQ Y9, Y8, Y0
- VPUNPCKLQDQ Y11, Y10, Y1
- VPUNPCKHQDQ Y9, Y8, Y2
- VPUNPCKHQDQ Y11, Y10, Y3
- VPUNPCKLQDQ Y13, Y12, Y4
- VPUNPCKLQDQ Y15, Y14, Y5
- VPUNPCKHQDQ Y13, Y12, Y6
- VPUNPCKHQDQ Y15, Y14, Y7
- VPERM2I128 $0x20, Y1, Y0, Y8
- VPERM2I128 $0x20, Y3, Y2, Y9
- VPERM2I128 $0x31, Y1, Y0, Y12
- VPERM2I128 $0x31, Y3, Y2, Y13
- VPERM2I128 $0x20, Y5, Y4, Y10
- VPERM2I128 $0x20, Y7, Y6, Y11
- VPERM2I128 $0x31, Y5, Y4, Y14
- VPERM2I128 $0x31, Y7, Y6, Y15
- VMOVDQU Y8, 32(DX)
- VMOVDQU Y9, 96(DX)
- VMOVDQU Y10, 160(DX)
- VMOVDQU Y11, 224(DX)
- VMOVDQU Y12, 288(DX)
- VMOVDQU Y13, 352(DX)
- VMOVDQU Y14, 416(DX)
- VMOVDQU Y15, 480(DX)
-
-chacha_blocks_avx2_mainloop1_cont:
- ADDQ $512, DX
- SUBQ $512, CX
- CMPQ CX, $512
- JAE chacha_blocks_avx2_atleast512
- CMPQ CX, $256
- JB chacha_blocks_avx2_below256_fixup
-
-chacha_blocks_avx2_atleast256:
- MOVQ 48(BP), AX
- LEAQ 1(AX), R8
- LEAQ 2(AX), R9
- LEAQ 3(AX), R10
- LEAQ 4(AX), BX
- MOVL AX, 128(BP)
- MOVL R8, 4+128(BP)
- MOVL R9, 8+128(BP)
- MOVL R10, 12+128(BP)
- SHRQ $32, AX
- SHRQ $32, R8
- SHRQ $32, R9
- SHRQ $32, R10
- MOVL AX, 160(BP)
- MOVL R8, 4+160(BP)
- MOVL R9, 8+160(BP)
- MOVL R10, 12+160(BP)
- MOVQ BX, 48(BP)
-
- // MOVQ 64(BP), AX
- MOVQ $20, AX
- VPBROADCASTD 0(BP), X0
- VPBROADCASTD 4+0(BP), X1
- VPBROADCASTD 8+0(BP), X2
- VPBROADCASTD 12+0(BP), X3
- VPBROADCASTD 16(BP), X4
- VPBROADCASTD 4+16(BP), X5
- VPBROADCASTD 8+16(BP), X6
- VPBROADCASTD 12+16(BP), X7
- VPBROADCASTD 32(BP), X8
- VPBROADCASTD 4+32(BP), X9
- VPBROADCASTD 8+32(BP), X10
- VPBROADCASTD 12+32(BP), X11
- VMOVDQA 128(BP), X12
- VMOVDQA 160(BP), X13
- VPBROADCASTD 8+48(BP), X14
- VPBROADCASTD 12+48(BP), X15
-
-chacha_blocks_avx2_mainloop2:
- VPADDD X0, X4, X0
- VPADDD X1, X5, X1
- VPXOR X12, X0, X12
- VPXOR X13, X1, X13
- VPADDD X2, X6, X2
- VPADDD X3, X7, X3
- VPXOR X14, X2, X14
- VPXOR X15, X3, X15
- VPSHUFB 448(BP), X12, X12
- VPSHUFB 448(BP), X13, X13
- VPADDD X8, X12, X8
- VPADDD X9, X13, X9
- VPSHUFB 448(BP), X14, X14
- VPSHUFB 448(BP), X15, X15
- VPADDD X10, X14, X10
- VPADDD X11, X15, X11
- VMOVDQA X12, 96(BP)
- VPXOR X4, X8, X4
- VPXOR X5, X9, X5
- VPSLLD $ 12, X4, X12
- VPSRLD $20, X4, X4
- VPXOR X4, X12, X4
- VPSLLD $ 12, X5, X12
- VPSRLD $20, X5, X5
- VPXOR X5, X12, X5
- VPXOR X6, X10, X6
- VPXOR X7, X11, X7
- VPSLLD $ 12, X6, X12
- VPSRLD $20, X6, X6
- VPXOR X6, X12, X6
- VPSLLD $ 12, X7, X12
- VPSRLD $20, X7, X7
- VPXOR X7, X12, X7
- VPADDD X0, X4, X0
- VPADDD X1, X5, X1
- VPXOR 96(BP), X0, X12
- VPXOR X13, X1, X13
- VPADDD X2, X6, X2
- VPADDD X3, X7, X3
- VPXOR X14, X2, X14
- VPXOR X15, X3, X15
- VPSHUFB 480(BP), X12, X12
- VPSHUFB 480(BP), X13, X13
- VPADDD X8, X12, X8
- VPADDD X9, X13, X9
- VPSHUFB 480(BP), X14, X14
- VPSHUFB 480(BP), X15, X15
- VPADDD X10, X14, X10
- VPADDD X11, X15, X11
- VMOVDQA X12, 96(BP)
- VPXOR X4, X8, X4
- VPXOR X5, X9, X5
- VPSLLD $ 7, X4, X12
- VPSRLD $25, X4, X4
- VPXOR X4, X12, X4
- VPSLLD $ 7, X5, X12
- VPSRLD $25, X5, X5
- VPXOR X5, X12, X5
- VPXOR X6, X10, X6
- VPXOR X7, X11, X7
- VPSLLD $ 7, X6, X12
- VPSRLD $25, X6, X6
- VPXOR X6, X12, X6
- VPSLLD $ 7, X7, X12
- VPSRLD $25, X7, X7
- VPXOR X7, X12, X7
- VPADDD X0, X5, X0
- VPADDD X1, X6, X1
- VPXOR X15, X0, X15
- VPXOR 96(BP), X1, X12
- VPADDD X2, X7, X2
- VPADDD X3, X4, X3
- VPXOR X13, X2, X13
- VPXOR X14, X3, X14
- VPSHUFB 448(BP), X15, X15
- VPSHUFB 448(BP), X12, X12
- VPADDD X10, X15, X10
- VPADDD X11, X12, X11
- VPSHUFB 448(BP), X13, X13
- VPSHUFB 448(BP), X14, X14
- VPADDD X8, X13, X8
- VPADDD X9, X14, X9
- VMOVDQA X15, 96(BP)
- VPXOR X5, X10, X5
- VPXOR X6, X11, X6
- VPSLLD $ 12, X5, X15
- VPSRLD $20, X5, X5
- VPXOR X5, X15, X5
- VPSLLD $ 12, X6, X15
- VPSRLD $20, X6, X6
- VPXOR X6, X15, X6
- VPXOR X7, X8, X7
- VPXOR X4, X9, X4
- VPSLLD $ 12, X7, X15
- VPSRLD $20, X7, X7
- VPXOR X7, X15, X7
- VPSLLD $ 12, X4, X15
- VPSRLD $20, X4, X4
- VPXOR X4, X15, X4
- VPADDD X0, X5, X0
- VPADDD X1, X6, X1
- VPXOR 96(BP), X0, X15
- VPXOR X12, X1, X12
- VPADDD X2, X7, X2
- VPADDD X3, X4, X3
- VPXOR X13, X2, X13
- VPXOR X14, X3, X14
- VPSHUFB 480(BP), X15, X15
- VPSHUFB 480(BP), X12, X12
- VPADDD X10, X15, X10
- VPADDD X11, X12, X11
- VPSHUFB 480(BP), X13, X13
- VPSHUFB 480(BP), X14, X14
- VPADDD X8, X13, X8
- VPADDD X9, X14, X9
- VMOVDQA X15, 96(BP)
- VPXOR X5, X10, X5
- VPXOR X6, X11, X6
- VPSLLD $ 7, X5, X15
- VPSRLD $25, X5, X5
- VPXOR X5, X15, X5
- VPSLLD $ 7, X6, X15
- VPSRLD $25, X6, X6
- VPXOR X6, X15, X6
- VPXOR X7, X8, X7
- VPXOR X4, X9, X4
- VPSLLD $ 7, X7, X15
- VPSRLD $25, X7, X7
- VPXOR X7, X15, X7
- VPSLLD $ 7, X4, X15
- VPSRLD $25, X4, X4
- VPXOR X4, X15, X4
- VMOVDQA 96(BP), X15
- SUBQ $2, AX
- JNZ chacha_blocks_avx2_mainloop2
- VMOVDQA X8, 192(BP)
- VMOVDQA X9, 208(BP)
- VMOVDQA X10, 224(BP)
- VMOVDQA X11, 240(BP)
- VMOVDQA X12, 256(BP)
- VMOVDQA X13, 272(BP)
- VMOVDQA X14, 288(BP)
- VMOVDQA X15, 304(BP)
- VPBROADCASTD 0(BP), X8
- VPBROADCASTD 4+0(BP), X9
- VPBROADCASTD 8+0(BP), X10
- VPBROADCASTD 12+0(BP), X11
- VPBROADCASTD 16(BP), X12
- VPBROADCASTD 4+16(BP), X13
- VPBROADCASTD 8+16(BP), X14
- VPBROADCASTD 12+16(BP), X15
- VPADDD X8, X0, X0
- VPADDD X9, X1, X1
- VPADDD X10, X2, X2
- VPADDD X11, X3, X3
- VPADDD X12, X4, X4
- VPADDD X13, X5, X5
- VPADDD X14, X6, X6
- VPADDD X15, X7, X7
- VPUNPCKLDQ X1, X0, X8
- VPUNPCKLDQ X3, X2, X9
- VPUNPCKHDQ X1, X0, X12
- VPUNPCKHDQ X3, X2, X13
- VPUNPCKLDQ X5, X4, X10
- VPUNPCKLDQ X7, X6, X11
- VPUNPCKHDQ X5, X4, X14
- VPUNPCKHDQ X7, X6, X15
- VPUNPCKLQDQ X9, X8, X0
- VPUNPCKLQDQ X11, X10, X1
- VPUNPCKHQDQ X9, X8, X2
- VPUNPCKHQDQ X11, X10, X3
- VPUNPCKLQDQ X13, X12, X4
- VPUNPCKLQDQ X15, X14, X5
- VPUNPCKHQDQ X13, X12, X6
- VPUNPCKHQDQ X15, X14, X7
- ANDQ SI, SI
- JZ chacha_blocks_avx2_noinput2
- VPXOR 0(SI), X0, X0
- VPXOR 16(SI), X1, X1
- VPXOR 64(SI), X2, X2
- VPXOR 80(SI), X3, X3
- VPXOR 128(SI), X4, X4
- VPXOR 144(SI), X5, X5
- VPXOR 192(SI), X6, X6
- VPXOR 208(SI), X7, X7
- VMOVDQU X0, 0(DX)
- VMOVDQU X1, 16(DX)
- VMOVDQU X2, 64(DX)
- VMOVDQU X3, 80(DX)
- VMOVDQU X4, 128(DX)
- VMOVDQU X5, 144(DX)
- VMOVDQU X6, 192(DX)
- VMOVDQU X7, 208(DX)
- VMOVDQA 192(BP), X0
- VMOVDQA 208(BP), X1
- VMOVDQA 224(BP), X2
- VMOVDQA 240(BP), X3
- VMOVDQA 256(BP), X4
- VMOVDQA 272(BP), X5
- VMOVDQA 288(BP), X6
- VMOVDQA 304(BP), X7
- VPBROADCASTD 32(BP), X8
- VPBROADCASTD 4+32(BP), X9
- VPBROADCASTD 8+32(BP), X10
- VPBROADCASTD 12+32(BP), X11
- VMOVDQA 128(BP), X12
- VMOVDQA 160(BP), X13
- VPBROADCASTD 8+48(BP), X14
- VPBROADCASTD 12+48(BP), X15
- VPADDD X8, X0, X0
- VPADDD X9, X1, X1
- VPADDD X10, X2, X2
- VPADDD X11, X3, X3
- VPADDD X12, X4, X4
- VPADDD X13, X5, X5
- VPADDD X14, X6, X6
- VPADDD X15, X7, X7
- VPUNPCKLDQ X1, X0, X8
- VPUNPCKLDQ X3, X2, X9
- VPUNPCKHDQ X1, X0, X12
- VPUNPCKHDQ X3, X2, X13
- VPUNPCKLDQ X5, X4, X10
- VPUNPCKLDQ X7, X6, X11
- VPUNPCKHDQ X5, X4, X14
- VPUNPCKHDQ X7, X6, X15
- VPUNPCKLQDQ X9, X8, X0
- VPUNPCKLQDQ X11, X10, X1
- VPUNPCKHQDQ X9, X8, X2
- VPUNPCKHQDQ X11, X10, X3
- VPUNPCKLQDQ X13, X12, X4
- VPUNPCKLQDQ X15, X14, X5
- VPUNPCKHQDQ X13, X12, X6
- VPUNPCKHQDQ X15, X14, X7
- VPXOR 32(SI), X0, X0
- VPXOR 48(SI), X1, X1
- VPXOR 96(SI), X2, X2
- VPXOR 112(SI), X3, X3
- VPXOR 160(SI), X4, X4
- VPXOR 176(SI), X5, X5
- VPXOR 224(SI), X6, X6
- VPXOR 240(SI), X7, X7
- VMOVDQU X0, 32(DX)
- VMOVDQU X1, 48(DX)
- VMOVDQU X2, 96(DX)
- VMOVDQU X3, 112(DX)
- VMOVDQU X4, 160(DX)
- VMOVDQU X5, 176(DX)
- VMOVDQU X6, 224(DX)
- VMOVDQU X7, 240(DX)
- ADDQ $256, SI
- JMP chacha_blocks_avx2_mainloop2_cont
-
-chacha_blocks_avx2_noinput2:
- VMOVDQU X0, 0(DX)
- VMOVDQU X1, 16(DX)
- VMOVDQU X2, 64(DX)
- VMOVDQU X3, 80(DX)
- VMOVDQU X4, 128(DX)
- VMOVDQU X5, 144(DX)
- VMOVDQU X6, 192(DX)
- VMOVDQU X7, 208(DX)
- VMOVDQA 192(BP), X0
- VMOVDQA 208(BP), X1
- VMOVDQA 224(BP), X2
- VMOVDQA 240(BP), X3
- VMOVDQA 256(BP), X4
- VMOVDQA 272(BP), X5
- VMOVDQA 288(BP), X6
- VMOVDQA 304(BP), X7
- VPBROADCASTD 32(BP), X8
- VPBROADCASTD 4+32(BP), X9
- VPBROADCASTD 8+32(BP), X10
- VPBROADCASTD 12+32(BP), X11
- VMOVDQA 128(BP), X12
- VMOVDQA 160(BP), X13
- VPBROADCASTD 8+48(BP), X14
- VPBROADCASTD 12+48(BP), X15
- VPADDD X8, X0, X0
- VPADDD X9, X1, X1
- VPADDD X10, X2, X2
- VPADDD X11, X3, X3
- VPADDD X12, X4, X4
- VPADDD X13, X5, X5
- VPADDD X14, X6, X6
- VPADDD X15, X7, X7
- VPUNPCKLDQ X1, X0, X8
- VPUNPCKLDQ X3, X2, X9
- VPUNPCKHDQ X1, X0, X12
- VPUNPCKHDQ X3, X2, X13
- VPUNPCKLDQ X5, X4, X10
- VPUNPCKLDQ X7, X6, X11
- VPUNPCKHDQ X5, X4, X14
- VPUNPCKHDQ X7, X6, X15
- VPUNPCKLQDQ X9, X8, X0
- VPUNPCKLQDQ X11, X10, X1
- VPUNPCKHQDQ X9, X8, X2
- VPUNPCKHQDQ X11, X10, X3
- VPUNPCKLQDQ X13, X12, X4
- VPUNPCKLQDQ X15, X14, X5
- VPUNPCKHQDQ X13, X12, X6
- VPUNPCKHQDQ X15, X14, X7
- VMOVDQU X0, 32(DX)
- VMOVDQU X1, 48(DX)
- VMOVDQU X2, 96(DX)
- VMOVDQU X3, 112(DX)
- VMOVDQU X4, 160(DX)
- VMOVDQU X5, 176(DX)
- VMOVDQU X6, 224(DX)
- VMOVDQU X7, 240(DX)
-
-chacha_blocks_avx2_mainloop2_cont:
- ADDQ $256, DX
- SUBQ $256, CX
- CMPQ CX, $256
- JAE chacha_blocks_avx2_atleast256
-
-chacha_blocks_avx2_below256_fixup:
- VMOVDQA 448(BP), X6
- VMOVDQA 480(BP), X7
- VMOVDQA 0(BP), X8
- VMOVDQA 16(BP), X9
- VMOVDQA 32(BP), X10
- VMOVDQA 48(BP), X11
- MOVQ $1, R9
-
-chacha_blocks_avx2_below256:
- VMOVQ R9, X5
- ANDQ CX, CX
- JZ chacha_blocks_avx2_done
- CMPQ CX, $64
- JAE chacha_blocks_avx2_above63
- MOVQ DX, R9
- ANDQ SI, SI
- JZ chacha_blocks_avx2_noinput3
- MOVQ CX, R10
- MOVQ BP, DX
- ADDQ R10, SI
- ADDQ R10, DX
- NEGQ R10
-
-chacha_blocks_avx2_copyinput:
- MOVB (SI)(R10*1), AX
- MOVB AX, (DX)(R10*1)
- INCQ R10
- JNZ chacha_blocks_avx2_copyinput
- MOVQ BP, SI
-
-chacha_blocks_avx2_noinput3:
- MOVQ BP, DX
-
-chacha_blocks_avx2_above63:
- VMOVDQA X8, X0
- VMOVDQA X9, X1
- VMOVDQA X10, X2
- VMOVDQA X11, X3
-
- // MOVQ 64(BP), AX
- MOVQ $20, AX
-
-chacha_blocks_avx2_mainloop3:
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X6, X3, X3
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSLLD $12, X1, X4
- VPSRLD $20, X1, X1
- VPXOR X1, X4, X1
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X7, X3, X3
- VPSHUFD $0x93, X0, X0
- VPADDD X2, X3, X2
- VPSHUFD $0x4e, X3, X3
- VPXOR X1, X2, X1
- VPSHUFD $0x39, X2, X2
- VPSLLD $7, X1, X4
- VPSRLD $25, X1, X1
- VPXOR X1, X4, X1
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X6, X3, X3
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSLLD $12, X1, X4
- VPSRLD $20, X1, X1
- VPXOR X1, X4, X1
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X7, X3, X3
- VPSHUFD $0x39, X0, X0
- VPADDD X2, X3, X2
- VPSHUFD $0x4e, X3, X3
- VPXOR X1, X2, X1
- VPSHUFD $0x93, X2, X2
- VPSLLD $7, X1, X4
- VPSRLD $25, X1, X1
- VPXOR X1, X4, X1
- SUBQ $2, AX
- JNZ chacha_blocks_avx2_mainloop3
- VPADDD X0, X8, X0
- VPADDD X1, X9, X1
- VPADDD X2, X10, X2
- VPADDD X3, X11, X3
- ANDQ SI, SI
- JZ chacha_blocks_avx2_noinput4
- VPXOR 0(SI), X0, X0
- VPXOR 16(SI), X1, X1
- VPXOR 32(SI), X2, X2
- VPXOR 48(SI), X3, X3
- ADDQ $64, SI
-
-chacha_blocks_avx2_noinput4:
- VMOVDQU X0, 0(DX)
- VMOVDQU X1, 16(DX)
- VMOVDQU X2, 32(DX)
- VMOVDQU X3, 48(DX)
- VPADDQ X11, X5, X11
- CMPQ CX, $64
- JBE chacha_blocks_avx2_mainloop3_finishup
- ADDQ $64, DX
- SUBQ $64, CX
- JMP chacha_blocks_avx2_below256
-
-chacha_blocks_avx2_mainloop3_finishup:
- CMPQ CX, $64
- JE chacha_blocks_avx2_done
- ADDQ CX, R9
- ADDQ CX, DX
- NEGQ CX
-
-chacha_blocks_avx2_copyoutput:
- MOVB (DX)(CX*1), AX
- MOVB AX, (R9)(CX*1)
- INCQ CX
- JNZ chacha_blocks_avx2_copyoutput
-
-chacha_blocks_avx2_done:
- VMOVDQU X11, 32(DI)
-
- VZEROUPPER
- RET
-
-// func hChaChaAVX2(key, nonce []byte, dst *byte)
-TEXT ·hChaChaAVX2(SB), NOSPLIT|NOFRAME, $0-56
- MOVQ key+0(FP), DI
- MOVQ nonce+24(FP), SI
- MOVQ dst+48(FP), DX
-
- MOVL $20, CX
-
- LEAQ ·chacha_constants<>(SB), AX
- VMOVDQA 0(AX), X0
- VMOVDQA 16(AX), X6
- VMOVDQA 32(AX), X5
-
- VMOVDQU 0(DI), X1
- VMOVDQU 16(DI), X2
- VMOVDQU 0(SI), X3
-
-hhacha_mainloop_avx2:
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X6, X3, X3
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSLLD $12, X1, X4
- VPSRLD $20, X1, X1
- VPXOR X1, X4, X1
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X5, X3, X3
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSLLD $7, X1, X4
- VPSRLD $25, X1, X1
- VPSHUFD $0x93, X0, X0
- VPXOR X1, X4, X1
- VPSHUFD $0x4e, X3, X3
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X6, X3, X3
- VPSHUFD $0x39, X2, X2
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSLLD $12, X1, X4
- VPSRLD $20, X1, X1
- VPXOR X1, X4, X1
- VPADDD X0, X1, X0
- VPXOR X3, X0, X3
- VPSHUFB X5, X3, X3
- VPADDD X2, X3, X2
- VPXOR X1, X2, X1
- VPSHUFD $0x39, X0, X0
- VPSLLD $7, X1, X4
- VPSHUFD $0x4e, X3, X3
- VPSRLD $25, X1, X1
- VPSHUFD $0x93, X2, X2
- VPXOR X1, X4, X1
- SUBL $2, CX
- JNE hhacha_mainloop_avx2
-
- VMOVDQU X0, (DX)
- VMOVDQU X3, 16(DX)
-
- VZEROUPPER
- RET
-
-// func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte)
-TEXT ·blocksSSSE3(SB), NOSPLIT, $576-56
- // This is Andrew Moon's SSSE3 ChaCha implementation taken from
- // supercop-20190110, with some minor changes, primarily calling
- // convention and assembly dialect related.
-
- // Align the stack on a 64 byte boundary.
- MOVQ SP, BP
- ADDQ $64, BP
- ANDQ $-64, BP
-
- // Go calling convention -> SYSV AMD64 (and a fixup).
- MOVQ s+0(FP), DI // &s -> DI
- ADDQ $16, DI // Skip the ChaCha constants in the chachaState.
- MOVQ in+8(FP), SI // &in[0] -> SI
- MOVQ out+32(FP), DX // &out[0] -> DX
- MOVQ in_len+16(FP), CX // len(in) -> CX
-
- // Begin the main body of `chacha_blocks_ssse3`.
- //
- // Mostly a direct translation except:
- // * The number of rounds is always 20.
- // * %rbp is used instead of BP.
- LEAQ ·chacha_constants<>(SB), AX
- MOVO 0(AX), X8
- MOVO 16(AX), X6
- MOVO 32(AX), X7
- MOVOU 0(DI), X9
- MOVOU 16(DI), X10
- MOVOU 32(DI), X11
-
- // MOVQ 48(DI), AX
- MOVQ $1, R9
- MOVO X8, 0(BP)
- MOVO X9, 16(BP)
- MOVO X10, 32(BP)
- MOVO X11, 48(BP)
-
- MOVO X6, 80(BP)
- MOVO X7, 96(BP)
- // MOVQ AX, 64(BP)
- CMPQ CX, $256
- JB chacha_blocks_ssse3_below256
- PSHUFD $0x00, X8, X0
- PSHUFD $0x55, X8, X1
- PSHUFD $0xaa, X8, X2
- PSHUFD $0xff, X8, X3
- MOVO X0, 128(BP)
- MOVO X1, 144(BP)
- MOVO X2, 160(BP)
- MOVO X3, 176(BP)
- PSHUFD $0x00, X9, X0
- PSHUFD $0x55, X9, X1
- PSHUFD $0xaa, X9, X2
- PSHUFD $0xff, X9, X3
- MOVO X0, 192(BP)
- MOVO X1, 208(BP)
- MOVO X2, 224(BP)
- MOVO X3, 240(BP)
- PSHUFD $0x00, X10, X0
- PSHUFD $0x55, X10, X1
- PSHUFD $0xaa, X10, X2
- PSHUFD $0xff, X10, X3
- MOVO X0, 256(BP)
- MOVO X1, 272(BP)
- MOVO X2, 288(BP)
- MOVO X3, 304(BP)
- PSHUFD $0xaa, X11, X0
- PSHUFD $0xff, X11, X1
- MOVO X0, 352(BP)
- MOVO X1, 368(BP)
- JMP chacha_blocks_ssse3_atleast256
-
-// .p2align 6,,63
-// # align to 4 mod 64
-// nop;nop;nop;nop;
-chacha_blocks_ssse3_atleast256:
- MOVQ 48(BP), AX
- LEAQ 1(AX), R8
- LEAQ 2(AX), R9
- LEAQ 3(AX), R10
- LEAQ 4(AX), BX
- MOVL AX, 320(BP)
- MOVL R8, 4+320(BP)
- MOVL R9, 8+320(BP)
- MOVL R10, 12+320(BP)
- SHRQ $32, AX
- SHRQ $32, R8
- SHRQ $32, R9
- SHRQ $32, R10
- MOVL AX, 336(BP)
- MOVL R8, 4+336(BP)
- MOVL R9, 8+336(BP)
- MOVL R10, 12+336(BP)
- MOVQ BX, 48(BP)
-
- // MOVQ 64(BP), AX
- MOVQ $20, AX
- MOVO 128(BP), X0
- MOVO 144(BP), X1
- MOVO 160(BP), X2
- MOVO 176(BP), X3
- MOVO 192(BP), X4
- MOVO 208(BP), X5
- MOVO 224(BP), X6
- MOVO 240(BP), X7
- MOVO 256(BP), X8
- MOVO 272(BP), X9
- MOVO 288(BP), X10
- MOVO 304(BP), X11
- MOVO 320(BP), X12
- MOVO 336(BP), X13
- MOVO 352(BP), X14
- MOVO 368(BP), X15
-
-chacha_blocks_ssse3_mainloop1:
- PADDD X4, X0
- PADDD X5, X1
- PXOR X0, X12
- PXOR X1, X13
- PADDD X6, X2
- PADDD X7, X3
- PXOR X2, X14
- PXOR X3, X15
- PSHUFB 80(BP), X12
- PSHUFB 80(BP), X13
- PADDD X12, X8
- PADDD X13, X9
- PSHUFB 80(BP), X14
- PSHUFB 80(BP), X15
- PADDD X14, X10
- PADDD X15, X11
- MOVO X12, 112(BP)
- PXOR X8, X4
- PXOR X9, X5
- MOVO X4, X12
- PSLLL $ 12, X4
- PSRLL $20, X12
- PXOR X12, X4
- MOVO X5, X12
- PSLLL $ 12, X5
- PSRLL $20, X12
- PXOR X12, X5
- PXOR X10, X6
- PXOR X11, X7
- MOVO X6, X12
- PSLLL $ 12, X6
- PSRLL $20, X12
- PXOR X12, X6
- MOVO X7, X12
- PSLLL $ 12, X7
- PSRLL $20, X12
- PXOR X12, X7
- MOVO 112(BP), X12
- PADDD X4, X0
- PADDD X5, X1
- PXOR X0, X12
- PXOR X1, X13
- PADDD X6, X2
- PADDD X7, X3
- PXOR X2, X14
- PXOR X3, X15
- PSHUFB 96(BP), X12
- PSHUFB 96(BP), X13
- PADDD X12, X8
- PADDD X13, X9
- PSHUFB 96(BP), X14
- PSHUFB 96(BP), X15
- PADDD X14, X10
- PADDD X15, X11
- MOVO X12, 112(BP)
- PXOR X8, X4
- PXOR X9, X5
- MOVO X4, X12
- PSLLL $ 7, X4
- PSRLL $25, X12
- PXOR X12, X4
- MOVO X5, X12
- PSLLL $ 7, X5
- PSRLL $25, X12
- PXOR X12, X5
- PXOR X10, X6
- PXOR X11, X7
- MOVO X6, X12
- PSLLL $ 7, X6
- PSRLL $25, X12
- PXOR X12, X6
- MOVO X7, X12
- PSLLL $ 7, X7
- PSRLL $25, X12
- PXOR X12, X7
- MOVO 112(BP), X12
- PADDD X5, X0
- PADDD X6, X1
- PXOR X0, X15
- PXOR X1, X12
- PADDD X7, X2
- PADDD X4, X3
- PXOR X2, X13
- PXOR X3, X14
- PSHUFB 80(BP), X15
- PSHUFB 80(BP), X12
- PADDD X15, X10
- PADDD X12, X11
- PSHUFB 80(BP), X13
- PSHUFB 80(BP), X14
- PADDD X13, X8
- PADDD X14, X9
- MOVO X15, 112(BP)
- PXOR X10, X5
- PXOR X11, X6
- MOVO X5, X15
- PSLLL $ 12, X5
- PSRLL $20, X15
- PXOR X15, X5
- MOVO X6, X15
- PSLLL $ 12, X6
- PSRLL $20, X15
- PXOR X15, X6
- PXOR X8, X7
- PXOR X9, X4
- MOVO X7, X15
- PSLLL $ 12, X7
- PSRLL $20, X15
- PXOR X15, X7
- MOVO X4, X15
- PSLLL $ 12, X4
- PSRLL $20, X15
- PXOR X15, X4
- MOVO 112(BP), X15
- PADDD X5, X0
- PADDD X6, X1
- PXOR X0, X15
- PXOR X1, X12
- PADDD X7, X2
- PADDD X4, X3
- PXOR X2, X13
- PXOR X3, X14
- PSHUFB 96(BP), X15
- PSHUFB 96(BP), X12
- PADDD X15, X10
- PADDD X12, X11
- PSHUFB 96(BP), X13
- PSHUFB 96(BP), X14
- PADDD X13, X8
- PADDD X14, X9
- MOVO X15, 112(BP)
- PXOR X10, X5
- PXOR X11, X6
- MOVO X5, X15
- PSLLL $ 7, X5
- PSRLL $25, X15
- PXOR X15, X5
- MOVO X6, X15
- PSLLL $ 7, X6
- PSRLL $25, X15
- PXOR X15, X6
- PXOR X8, X7
- PXOR X9, X4
- MOVO X7, X15
- PSLLL $ 7, X7
- PSRLL $25, X15
- PXOR X15, X7
- MOVO X4, X15
- PSLLL $ 7, X4
- PSRLL $25, X15
- PXOR X15, X4
- SUBQ $2, AX
- MOVO 112(BP), X15
- JNZ chacha_blocks_ssse3_mainloop1
- PADDD 128(BP), X0
- PADDD 144(BP), X1
- PADDD 160(BP), X2
- PADDD 176(BP), X3
- PADDD 192(BP), X4
- PADDD 208(BP), X5
- PADDD 224(BP), X6
- PADDD 240(BP), X7
- PADDD 256(BP), X8
- PADDD 272(BP), X9
- PADDD 288(BP), X10
- PADDD 304(BP), X11
- PADDD 320(BP), X12
- PADDD 336(BP), X13
- PADDD 352(BP), X14
- PADDD 368(BP), X15
- MOVO X8, 384(BP)
- MOVO X9, 400(BP)
- MOVO X10, 416(BP)
- MOVO X11, 432(BP)
- MOVO X12, 448(BP)
- MOVO X13, 464(BP)
- MOVO X14, 480(BP)
- MOVO X15, 496(BP)
- MOVO X0, X8
- MOVO X2, X9
- MOVO X4, X10
- MOVO X6, X11
- PUNPCKHLQ X1, X0
- PUNPCKHLQ X3, X2
- PUNPCKHLQ X5, X4
- PUNPCKHLQ X7, X6
- PUNPCKLLQ X1, X8
- PUNPCKLLQ X3, X9
- PUNPCKLLQ X5, X10
- PUNPCKLLQ X7, X11
- MOVO X0, X1
- MOVO X4, X3
- MOVO X8, X5
- MOVO X10, X7
- PUNPCKHQDQ X2, X0
- PUNPCKHQDQ X6, X4
- PUNPCKHQDQ X9, X8
- PUNPCKHQDQ X11, X10
- PUNPCKLQDQ X2, X1
- PUNPCKLQDQ X6, X3
- PUNPCKLQDQ X9, X5
- PUNPCKLQDQ X11, X7
- ANDQ SI, SI
- JZ chacha_blocks_ssse3_noinput1
- MOVOU 0(SI), X2
- MOVOU 16(SI), X6
- MOVOU 64(SI), X9
- MOVOU 80(SI), X11
- MOVOU 128(SI), X12
- MOVOU 144(SI), X13
- MOVOU 192(SI), X14
- MOVOU 208(SI), X15
- PXOR X2, X5
- PXOR X6, X7
- PXOR X9, X8
- PXOR X11, X10
- PXOR X12, X1
- PXOR X13, X3
- PXOR X14, X0
- PXOR X15, X4
- MOVOU X5, 0(DX)
- MOVOU X7, 16(DX)
- MOVOU X8, 64(DX)
- MOVOU X10, 80(DX)
- MOVOU X1, 128(DX)
- MOVOU X3, 144(DX)
- MOVOU X0, 192(DX)
- MOVOU X4, 208(DX)
- MOVO 384(BP), X0
- MOVO 400(BP), X1
- MOVO 416(BP), X2
- MOVO 432(BP), X3
- MOVO 448(BP), X4
- MOVO 464(BP), X5
- MOVO 480(BP), X6
- MOVO 496(BP), X7
- MOVO X0, X8
- MOVO X2, X9
- MOVO X4, X10
- MOVO X6, X11
- PUNPCKLLQ X1, X8
- PUNPCKLLQ X3, X9
- PUNPCKHLQ X1, X0
- PUNPCKHLQ X3, X2
- PUNPCKLLQ X5, X10
- PUNPCKLLQ X7, X11
- PUNPCKHLQ X5, X4
- PUNPCKHLQ X7, X6
- MOVO X8, X1
- MOVO X0, X3
- MOVO X10, X5
- MOVO X4, X7
- PUNPCKLQDQ X9, X1
- PUNPCKLQDQ X11, X5
- PUNPCKHQDQ X9, X8
- PUNPCKHQDQ X11, X10
- PUNPCKLQDQ X2, X3
- PUNPCKLQDQ X6, X7
- PUNPCKHQDQ X2, X0
- PUNPCKHQDQ X6, X4
- MOVOU 32(SI), X2
- MOVOU 48(SI), X6
- MOVOU 96(SI), X9
- MOVOU 112(SI), X11
- MOVOU 160(SI), X12
- MOVOU 176(SI), X13
- MOVOU 224(SI), X14
- MOVOU 240(SI), X15
- PXOR X2, X1
- PXOR X6, X5
- PXOR X9, X8
- PXOR X11, X10
- PXOR X12, X3
- PXOR X13, X7
- PXOR X14, X0
- PXOR X15, X4
- MOVOU X1, 32(DX)
- MOVOU X5, 48(DX)
- MOVOU X8, 96(DX)
- MOVOU X10, 112(DX)
- MOVOU X3, 160(DX)
- MOVOU X7, 176(DX)
- MOVOU X0, 224(DX)
- MOVOU X4, 240(DX)
- ADDQ $256, SI
- JMP chacha_blocks_ssse3_mainloop_cont
-
-chacha_blocks_ssse3_noinput1:
- MOVOU X5, 0(DX)
- MOVOU X7, 16(DX)
- MOVOU X8, 64(DX)
- MOVOU X10, 80(DX)
- MOVOU X1, 128(DX)
- MOVOU X3, 144(DX)
- MOVOU X0, 192(DX)
- MOVOU X4, 208(DX)
- MOVO 384(BP), X0
- MOVO 400(BP), X1
- MOVO 416(BP), X2
- MOVO 432(BP), X3
- MOVO 448(BP), X4
- MOVO 464(BP), X5
- MOVO 480(BP), X6
- MOVO 496(BP), X7
- MOVO X0, X8
- MOVO X2, X9
- MOVO X4, X10
- MOVO X6, X11
- PUNPCKLLQ X1, X8
- PUNPCKLLQ X3, X9
- PUNPCKHLQ X1, X0
- PUNPCKHLQ X3, X2
- PUNPCKLLQ X5, X10
- PUNPCKLLQ X7, X11
- PUNPCKHLQ X5, X4
- PUNPCKHLQ X7, X6
- MOVO X8, X1
- MOVO X0, X3
- MOVO X10, X5
- MOVO X4, X7
- PUNPCKLQDQ X9, X1
- PUNPCKLQDQ X11, X5
- PUNPCKHQDQ X9, X8
- PUNPCKHQDQ X11, X10
- PUNPCKLQDQ X2, X3
- PUNPCKLQDQ X6, X7
- PUNPCKHQDQ X2, X0
- PUNPCKHQDQ X6, X4
- MOVOU X1, 32(DX)
- MOVOU X5, 48(DX)
- MOVOU X8, 96(DX)
- MOVOU X10, 112(DX)
- MOVOU X3, 160(DX)
- MOVOU X7, 176(DX)
- MOVOU X0, 224(DX)
- MOVOU X4, 240(DX)
-
-chacha_blocks_ssse3_mainloop_cont:
- ADDQ $256, DX
- SUBQ $256, CX
- CMPQ CX, $256
- JAE chacha_blocks_ssse3_atleast256
- MOVO 80(BP), X6
- MOVO 96(BP), X7
- MOVO 0(BP), X8
- MOVO 16(BP), X9
- MOVO 32(BP), X10
- MOVO 48(BP), X11
- MOVQ $1, R9
-
-chacha_blocks_ssse3_below256:
- MOVQ R9, X5
- ANDQ CX, CX
- JZ chacha_blocks_ssse3_done
- CMPQ CX, $64
- JAE chacha_blocks_ssse3_above63
- MOVQ DX, R9
- ANDQ SI, SI
- JZ chacha_blocks_ssse3_noinput2
- MOVQ CX, R10
- MOVQ BP, DX
- ADDQ R10, SI
- ADDQ R10, DX
- NEGQ R10
-
-chacha_blocks_ssse3_copyinput:
- MOVB (SI)(R10*1), AX
- MOVB AX, (DX)(R10*1)
- INCQ R10
- JNZ chacha_blocks_ssse3_copyinput
- MOVQ BP, SI
-
-chacha_blocks_ssse3_noinput2:
- MOVQ BP, DX
-
-chacha_blocks_ssse3_above63:
- MOVO X8, X0
- MOVO X9, X1
- MOVO X10, X2
- MOVO X11, X3
-
- // MOVQ 64(BP), AX
- MOVQ $20, AX
-
-chacha_blocks_ssse3_mainloop2:
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X6, X3
- PADDD X3, X2
- PXOR X2, X1
- MOVO X1, X4
- PSLLL $12, X4
- PSRLL $20, X1
- PXOR X4, X1
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X7, X3
- PSHUFD $0x93, X0, X0
- PADDD X3, X2
- PSHUFD $0x4e, X3, X3
- PXOR X2, X1
- PSHUFD $0x39, X2, X2
- MOVO X1, X4
- PSLLL $7, X4
- PSRLL $25, X1
- PXOR X4, X1
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X6, X3
- PADDD X3, X2
- PXOR X2, X1
- MOVO X1, X4
- PSLLL $12, X4
- PSRLL $20, X1
- PXOR X4, X1
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X7, X3
- PSHUFD $0x39, X0, X0
- PADDD X3, X2
- PSHUFD $0x4e, X3, X3
- PXOR X2, X1
- PSHUFD $0x93, X2, X2
- MOVO X1, X4
- PSLLL $7, X4
- PSRLL $25, X1
- PXOR X4, X1
- SUBQ $2, AX
- JNZ chacha_blocks_ssse3_mainloop2
- PADDD X8, X0
- PADDD X9, X1
- PADDD X10, X2
- PADDD X11, X3
- ANDQ SI, SI
- JZ chacha_blocks_ssse3_noinput3
- MOVOU 0(SI), X12
- MOVOU 16(SI), X13
- MOVOU 32(SI), X14
- MOVOU 48(SI), X15
- PXOR X12, X0
- PXOR X13, X1
- PXOR X14, X2
- PXOR X15, X3
- ADDQ $64, SI
-
-chacha_blocks_ssse3_noinput3:
- MOVOU X0, 0(DX)
- MOVOU X1, 16(DX)
- MOVOU X2, 32(DX)
- MOVOU X3, 48(DX)
- PADDQ X5, X11
- CMPQ CX, $64
- JBE chacha_blocks_ssse3_mainloop2_finishup
- ADDQ $64, DX
- SUBQ $64, CX
- JMP chacha_blocks_ssse3_below256
-
-chacha_blocks_ssse3_mainloop2_finishup:
- CMPQ CX, $64
- JE chacha_blocks_ssse3_done
- ADDQ CX, R9
- ADDQ CX, DX
- NEGQ CX
-
-chacha_blocks_ssse3_copyoutput:
- MOVB (DX)(CX*1), AX
- MOVB AX, (R9)(CX*1)
- INCQ CX
- JNZ chacha_blocks_ssse3_copyoutput
-
-chacha_blocks_ssse3_done:
- MOVOU X11, 32(DI)
-
- RET
-
-// func hChaChaSSSE3(key, nonce []byte, dst *byte)
-TEXT ·hChaChaSSSE3(SB), NOSPLIT|NOFRAME, $0-56
- MOVQ key+0(FP), DI
- MOVQ nonce+24(FP), SI
- MOVQ dst+48(FP), DX
-
- MOVL $20, CX
-
- LEAQ ·chacha_constants<>(SB), AX
- MOVO 0(AX), X0
- MOVO 16(AX), X5
- MOVO 32(AX), X6
-
- MOVOU 0(DI), X1
- MOVOU 16(DI), X2
- MOVOU 0(SI), X3
-
-hchacha_ssse3_mainloop:
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X5, X3
- PADDD X3, X2
- PXOR X2, X1
- MOVO X1, X4
- PSLLL $12, X1
- PSRLL $20, X4
- PXOR X4, X1
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X6, X3
- PSHUFD $0X93, X0, X0
- PADDD X3, X2
- PSHUFD $0X4E, X3, X3
- PXOR X2, X1
- PSHUFD $0X39, X2, X2
- MOVO X1, X4
- PSLLL $7, X1
- PSRLL $25, X4
- PXOR X4, X1
- SUBQ $2, CX
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X5, X3
- PADDD X3, X2
- PXOR X2, X1
- MOVO X1, X4
- PSLLL $12, X1
- PSRLL $20, X4
- PXOR X4, X1
- PADDD X1, X0
- PXOR X0, X3
- PSHUFB X6, X3
- PSHUFD $0X39, X0, X0
- PADDD X3, X2
- PSHUFD $0X4E, X3, X3
- PXOR X2, X1
- PSHUFD $0X93, X2, X2
- MOVO X1, X4
- PSLLL $7, X1
- PSRLL $25, X4
- PXOR X4, X1
- JA hchacha_ssse3_mainloop
-
- MOVOU X0, 0(DX)
- MOVOU X3, 16(DX)
-
- RET
diff --git a/vendor/blitter.com/go/chacha20/internal/ref/impl.go b/vendor/blitter.com/go/chacha20/internal/ref/impl.go
deleted file mode 100644
index d3a8ca6..0000000
--- a/vendor/blitter.com/go/chacha20/internal/ref/impl.go
+++ /dev/null
@@ -1,374 +0,0 @@
-// Copryright (C) 2019 Yawning Angel
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-// Package ref provides the portable ChaCha20 implementation.
-package ref
-
-import (
- "encoding/binary"
- "math/bits"
-
- "blitter.com/go/chacha20/internal/api"
-)
-
-const rounds = 20
-
-// Impl is the reference implementation (exposed for testing).
-var Impl = &implRef{}
-
-type implRef struct{}
-
-func (impl *implRef) Name() string {
- return "ref"
-}
-
-func (impl *implRef) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
- for n := 0; n < nrBlocks; n++ {
- x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
- x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
-
- for i := rounds; i > 0; i -= 2 {
- // quarterround(x, 0, 4, 8, 12)
- x0 += x4
- x12 ^= x0
- x12 = bits.RotateLeft32(x12, 16)
- x8 += x12
- x4 ^= x8
- x4 = bits.RotateLeft32(x4, 12)
- x0 += x4
- x12 ^= x0
- x12 = bits.RotateLeft32(x12, 8)
- x8 += x12
- x4 ^= x8
- x4 = bits.RotateLeft32(x4, 7)
-
- // quarterround(x, 1, 5, 9, 13)
- x1 += x5
- x13 ^= x1
- x13 = bits.RotateLeft32(x13, 16)
- x9 += x13
- x5 ^= x9
- x5 = bits.RotateLeft32(x5, 12)
- x1 += x5
- x13 ^= x1
- x13 = bits.RotateLeft32(x13, 8)
- x9 += x13
- x5 ^= x9
- x5 = bits.RotateLeft32(x5, 7)
-
- // quarterround(x, 2, 6, 10, 14)
- x2 += x6
- x14 ^= x2
- x14 = bits.RotateLeft32(x14, 16)
- x10 += x14
- x6 ^= x10
- x6 = bits.RotateLeft32(x6, 12)
- x2 += x6
- x14 ^= x2
- x14 = bits.RotateLeft32(x14, 8)
- x10 += x14
- x6 ^= x10
- x6 = bits.RotateLeft32(x6, 7)
-
- // quarterround(x, 3, 7, 11, 15)
- x3 += x7
- x15 ^= x3
- x15 = bits.RotateLeft32(x15, 16)
- x11 += x15
- x7 ^= x11
- x7 = bits.RotateLeft32(x7, 12)
- x3 += x7
- x15 ^= x3
- x15 = bits.RotateLeft32(x15, 8)
- x11 += x15
- x7 ^= x11
- x7 = bits.RotateLeft32(x7, 7)
-
- // quarterround(x, 0, 5, 10, 15)
- x0 += x5
- x15 ^= x0
- x15 = bits.RotateLeft32(x15, 16)
- x10 += x15
- x5 ^= x10
- x5 = bits.RotateLeft32(x5, 12)
- x0 += x5
- x15 ^= x0
- x15 = bits.RotateLeft32(x15, 8)
- x10 += x15
- x5 ^= x10
- x5 = bits.RotateLeft32(x5, 7)
-
- // quarterround(x, 1, 6, 11, 12)
- x1 += x6
- x12 ^= x1
- x12 = bits.RotateLeft32(x12, 16)
- x11 += x12
- x6 ^= x11
- x6 = bits.RotateLeft32(x6, 12)
- x1 += x6
- x12 ^= x1
- x12 = bits.RotateLeft32(x12, 8)
- x11 += x12
- x6 ^= x11
- x6 = bits.RotateLeft32(x6, 7)
-
- // quarterround(x, 2, 7, 8, 13)
- x2 += x7
- x13 ^= x2
- x13 = bits.RotateLeft32(x13, 16)
- x8 += x13
- x7 ^= x8
- x7 = bits.RotateLeft32(x7, 12)
- x2 += x7
- x13 ^= x2
- x13 = bits.RotateLeft32(x13, 8)
- x8 += x13
- x7 ^= x8
- x7 = bits.RotateLeft32(x7, 7)
-
- // quarterround(x, 3, 4, 9, 14)
- x3 += x4
- x14 ^= x3
- x14 = bits.RotateLeft32(x14, 16)
- x9 += x14
- x4 ^= x9
- x4 = bits.RotateLeft32(x4, 12)
- x3 += x4
- x14 ^= x3
- x14 = bits.RotateLeft32(x14, 8)
- x9 += x14
- x4 ^= x9
- x4 = bits.RotateLeft32(x4, 7)
- }
-
- x0 += api.Sigma0
- x1 += api.Sigma1
- x2 += api.Sigma2
- x3 += api.Sigma3
- x4 += x[4]
- x5 += x[5]
- x6 += x[6]
- x7 += x[7]
- x8 += x[8]
- x9 += x[9]
- x10 += x[10]
- x11 += x[11]
- x12 += x[12]
- x13 += x[13]
- x14 += x[14]
- x15 += x[15]
-
- _ = dst[api.BlockSize-1] // Force bounds check elimination.
-
- if src != nil {
- _ = src[api.BlockSize-1] // Force bounds check elimination.
- binary.LittleEndian.PutUint32(dst[0:4], binary.LittleEndian.Uint32(src[0:4])^x0)
- binary.LittleEndian.PutUint32(dst[4:8], binary.LittleEndian.Uint32(src[4:8])^x1)
- binary.LittleEndian.PutUint32(dst[8:12], binary.LittleEndian.Uint32(src[8:12])^x2)
- binary.LittleEndian.PutUint32(dst[12:16], binary.LittleEndian.Uint32(src[12:16])^x3)
- binary.LittleEndian.PutUint32(dst[16:20], binary.LittleEndian.Uint32(src[16:20])^x4)
- binary.LittleEndian.PutUint32(dst[20:24], binary.LittleEndian.Uint32(src[20:24])^x5)
- binary.LittleEndian.PutUint32(dst[24:28], binary.LittleEndian.Uint32(src[24:28])^x6)
- binary.LittleEndian.PutUint32(dst[28:32], binary.LittleEndian.Uint32(src[28:32])^x7)
- binary.LittleEndian.PutUint32(dst[32:36], binary.LittleEndian.Uint32(src[32:36])^x8)
- binary.LittleEndian.PutUint32(dst[36:40], binary.LittleEndian.Uint32(src[36:40])^x9)
- binary.LittleEndian.PutUint32(dst[40:44], binary.LittleEndian.Uint32(src[40:44])^x10)
- binary.LittleEndian.PutUint32(dst[44:48], binary.LittleEndian.Uint32(src[44:48])^x11)
- binary.LittleEndian.PutUint32(dst[48:52], binary.LittleEndian.Uint32(src[48:52])^x12)
- binary.LittleEndian.PutUint32(dst[52:56], binary.LittleEndian.Uint32(src[52:56])^x13)
- binary.LittleEndian.PutUint32(dst[56:60], binary.LittleEndian.Uint32(src[56:60])^x14)
- binary.LittleEndian.PutUint32(dst[60:64], binary.LittleEndian.Uint32(src[60:64])^x15)
- src = src[api.BlockSize:]
- } else {
- binary.LittleEndian.PutUint32(dst[0:4], x0)
- binary.LittleEndian.PutUint32(dst[4:8], x1)
- binary.LittleEndian.PutUint32(dst[8:12], x2)
- binary.LittleEndian.PutUint32(dst[12:16], x3)
- binary.LittleEndian.PutUint32(dst[16:20], x4)
- binary.LittleEndian.PutUint32(dst[20:24], x5)
- binary.LittleEndian.PutUint32(dst[24:28], x6)
- binary.LittleEndian.PutUint32(dst[28:32], x7)
- binary.LittleEndian.PutUint32(dst[32:36], x8)
- binary.LittleEndian.PutUint32(dst[36:40], x9)
- binary.LittleEndian.PutUint32(dst[40:44], x10)
- binary.LittleEndian.PutUint32(dst[44:48], x11)
- binary.LittleEndian.PutUint32(dst[48:52], x12)
- binary.LittleEndian.PutUint32(dst[52:56], x13)
- binary.LittleEndian.PutUint32(dst[56:60], x14)
- binary.LittleEndian.PutUint32(dst[60:64], x15)
- }
- dst = dst[api.BlockSize:]
-
- // Stoping at 2^70 bytes per nonce is the user's responsibility.
- ctr := uint64(x[13])<<32 | uint64(x[12])
- ctr++
- x[12] = uint32(ctr)
- x[13] = uint32(ctr >> 32)
- }
-}
-
-func (impl *implRef) HChaCha(key, nonce []byte, dst []byte) {
- // Force bounds check elimination.
- _ = key[31]
- _ = nonce[api.HNonceSize-1]
-
- x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
- x4 := binary.LittleEndian.Uint32(key[0:4])
- x5 := binary.LittleEndian.Uint32(key[4:8])
- x6 := binary.LittleEndian.Uint32(key[8:12])
- x7 := binary.LittleEndian.Uint32(key[12:16])
- x8 := binary.LittleEndian.Uint32(key[16:20])
- x9 := binary.LittleEndian.Uint32(key[20:24])
- x10 := binary.LittleEndian.Uint32(key[24:28])
- x11 := binary.LittleEndian.Uint32(key[28:32])
- x12 := binary.LittleEndian.Uint32(nonce[0:4])
- x13 := binary.LittleEndian.Uint32(nonce[4:8])
- x14 := binary.LittleEndian.Uint32(nonce[8:12])
- x15 := binary.LittleEndian.Uint32(nonce[12:16])
-
- // Yes, this could be carved out into a function for code reuse (TM)
- // however the go inliner won't inline it.
- for i := rounds; i > 0; i -= 2 {
- // quarterround(x, 0, 4, 8, 12)
- x0 += x4
- x12 ^= x0
- x12 = bits.RotateLeft32(x12, 16)
- x8 += x12
- x4 ^= x8
- x4 = bits.RotateLeft32(x4, 12)
- x0 += x4
- x12 ^= x0
- x12 = bits.RotateLeft32(x12, 8)
- x8 += x12
- x4 ^= x8
- x4 = bits.RotateLeft32(x4, 7)
-
- // quarterround(x, 1, 5, 9, 13)
- x1 += x5
- x13 ^= x1
- x13 = bits.RotateLeft32(x13, 16)
- x9 += x13
- x5 ^= x9
- x5 = bits.RotateLeft32(x5, 12)
- x1 += x5
- x13 ^= x1
- x13 = bits.RotateLeft32(x13, 8)
- x9 += x13
- x5 ^= x9
- x5 = bits.RotateLeft32(x5, 7)
-
- // quarterround(x, 2, 6, 10, 14)
- x2 += x6
- x14 ^= x2
- x14 = bits.RotateLeft32(x14, 16)
- x10 += x14
- x6 ^= x10
- x6 = bits.RotateLeft32(x6, 12)
- x2 += x6
- x14 ^= x2
- x14 = bits.RotateLeft32(x14, 8)
- x10 += x14
- x6 ^= x10
- x6 = bits.RotateLeft32(x6, 7)
-
- // quarterround(x, 3, 7, 11, 15)
- x3 += x7
- x15 ^= x3
- x15 = bits.RotateLeft32(x15, 16)
- x11 += x15
- x7 ^= x11
- x7 = bits.RotateLeft32(x7, 12)
- x3 += x7
- x15 ^= x3
- x15 = bits.RotateLeft32(x15, 8)
- x11 += x15
- x7 ^= x11
- x7 = bits.RotateLeft32(x7, 7)
-
- // quarterround(x, 0, 5, 10, 15)
- x0 += x5
- x15 ^= x0
- x15 = bits.RotateLeft32(x15, 16)
- x10 += x15
- x5 ^= x10
- x5 = bits.RotateLeft32(x5, 12)
- x0 += x5
- x15 ^= x0
- x15 = bits.RotateLeft32(x15, 8)
- x10 += x15
- x5 ^= x10
- x5 = bits.RotateLeft32(x5, 7)
-
- // quarterround(x, 1, 6, 11, 12)
- x1 += x6
- x12 ^= x1
- x12 = bits.RotateLeft32(x12, 16)
- x11 += x12
- x6 ^= x11
- x6 = bits.RotateLeft32(x6, 12)
- x1 += x6
- x12 ^= x1
- x12 = bits.RotateLeft32(x12, 8)
- x11 += x12
- x6 ^= x11
- x6 = bits.RotateLeft32(x6, 7)
-
- // quarterround(x, 2, 7, 8, 13)
- x2 += x7
- x13 ^= x2
- x13 = bits.RotateLeft32(x13, 16)
- x8 += x13
- x7 ^= x8
- x7 = bits.RotateLeft32(x7, 12)
- x2 += x7
- x13 ^= x2
- x13 = bits.RotateLeft32(x13, 8)
- x8 += x13
- x7 ^= x8
- x7 = bits.RotateLeft32(x7, 7)
-
- // quarterround(x, 3, 4, 9, 14)
- x3 += x4
- x14 ^= x3
- x14 = bits.RotateLeft32(x14, 16)
- x9 += x14
- x4 ^= x9
- x4 = bits.RotateLeft32(x4, 12)
- x3 += x4
- x14 ^= x3
- x14 = bits.RotateLeft32(x14, 8)
- x9 += x14
- x4 ^= x9
- x4 = bits.RotateLeft32(x4, 7)
- }
-
- // HChaCha returns x0...x3 | x12...x15, which corresponds to the
- // indexes of the ChaCha constant and the indexes of the IV.
- _ = dst[api.HashSize-1] // Force bounds check elimination.
- binary.LittleEndian.PutUint32(dst[0:4], x0)
- binary.LittleEndian.PutUint32(dst[4:8], x1)
- binary.LittleEndian.PutUint32(dst[8:12], x2)
- binary.LittleEndian.PutUint32(dst[12:16], x3)
- binary.LittleEndian.PutUint32(dst[16:20], x12)
- binary.LittleEndian.PutUint32(dst[20:24], x13)
- binary.LittleEndian.PutUint32(dst[24:28], x14)
- binary.LittleEndian.PutUint32(dst[28:32], x15)
-}
-
-// Register appends the implementation to the provided slice, and returns the
-// new slice.
-func Register(impls []api.Implementation) []api.Implementation {
- return append(impls, Impl)
-}
diff --git a/vendor/blitter.com/go/cryptmt/README.md b/vendor/blitter.com/go/cryptmt/README.md
deleted file mode 100644
index 2f1e323..0000000
--- a/vendor/blitter.com/go/cryptmt/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-[![GoDoc](https://godoc.org/blitter.com/go/cryptmt?status.svg)](https://godoc.org/blitter.com/go/cryptmt)
-
-Implementation of cryptMTv1 stream cipher (but with mtwist64 as base accum)
-https://eprint.iacr.org/2005/165.pdf
-
-Uses Mersenne Twister 64 golang implementation supplied by [cuixin](https://gist.github.com/cuixin): [gist](https://gist.github.com/cuixin/1b8b6bd7bfbde8fe76e8)
diff --git a/vendor/blitter.com/go/cryptmt/cryptmt.go b/vendor/blitter.com/go/cryptmt/cryptmt.go
deleted file mode 100644
index 3a627ed..0000000
--- a/vendor/blitter.com/go/cryptmt/cryptmt.go
+++ /dev/null
@@ -1,78 +0,0 @@
-// Package CryptMT - implementation of cryptMTv1 stream cipher
-// (but with mtwist64 as base accum)
-// https://eprint.iacr.org/2005/165.pdf
-package cryptmt
-
-// TODO rlm: according to go docs, stream ciphers do not implement the
-// cipher.Block interface at all (thus do not support Encrypt() or
-// Decrypt() .. cipher.StreamReader/StreamWriter() only call
-// XORKeyStream() anyhow and for my own purposes this is all that is
-// required.
-
-import (
- "errors"
- "io"
-
- mtwist "blitter.com/go/mtwist"
-)
-
-type Cipher struct {
- r io.Reader
- w io.Writer
- accum uint64
- m *mtwist.MT19937_64
-}
-
-func (c *Cipher) yield() (r byte) {
- c.accum = c.accum * (c.m.Int63() | 1)
- r = byte(c.accum>>56) & 0xFF
- return
-}
-
-// New creates and returns a Cipher. The key argument should be the
-// CryptMT key, 64 bytes.
-func New(r io.Reader, w io.Writer, key []byte) (c *Cipher) {
- c = &Cipher{m: mtwist.New(), r: r, w: w}
- c.m.SeedFullState(key)
- c.accum = 1
- // from paper, discard first 64 bytes of output
- for idx := 0; idx < 64; idx++ {
- _ = c.yield()
- }
- return c
-}
-
-func (c *Cipher) Read(p []byte) (n int, err error) {
- n, err = c.r.Read(p)
- if err == nil {
- for idx := 0; idx < n; idx++ {
- p[idx] = p[idx] ^ c.yield()
- }
- }
- return n, err
-}
-
-func (c *Cipher) Write(p []byte) (n int, err error) {
- n, err = c.w.Write(p)
- return n, err
-}
-
-// XORKeyStream XORs each byte in the given slice with a byte from the
-// cipher's key stream. Dst and src must overlap entirely or not at all.
-//
-// If len(dst) < len(src), XORKeyStream should panic. It is acceptable
-// to pass a dst bigger than src, and in that case, XORKeyStream will
-// only update dst[:len(src)] and will not touch the rest of dst.
-//
-// Multiple calls to XORKeyStream behave as if the concatenation of
-// the src buffers was passed in a single run. That is, Stream
-// maintains state and does not reset at each XORKeyStream call.
-func (c *Cipher) XORKeyStream(dst, src []byte) {
- if len(dst) < len(src) {
- panic(errors.New("len(dst) < len(src)"))
- }
-
- for i, b := range src {
- dst[i] = b ^ c.yield()
- }
-}
diff --git a/vendor/blitter.com/go/goutmp/LICENSE.mit b/vendor/blitter.com/go/goutmp/LICENSE.mit
deleted file mode 100644
index 8d5754c..0000000
--- a/vendor/blitter.com/go/goutmp/LICENSE.mit
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2017 - 2018 Russell Magee
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/vendor/blitter.com/go/goutmp/README.md b/vendor/blitter.com/go/goutmp/README.md
deleted file mode 100644
index cb956fc..0000000
--- a/vendor/blitter.com/go/goutmp/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-goutmp - Minimal bindings to C stdlib pututmpx(), getutmpx() (/var/log/wtmp) and /var/log/lastlog
-
-Any Go program which allows user shell access should update the standard UNIX files which track user sessions: /var/log/wtmp (for the 'w' and 'who' commands), and /var/log/lastlog (the 'last' and 'lastlog' commands).
-
-```
-go doc
-package goutmp // import "blitter.com/go/goutmp"
-
-Golang bindings for basic login/utmp accounting
-
-type UtmpEntry struct{ ... }
-
-func Put_lastlog_entry(app, usr, ptsname, host string)
-func Unput_utmp(entry UtmpEntry)
-func Put_utmp(user, ptsname, host string) UtmpEntry
-```
-
diff --git a/vendor/blitter.com/go/goutmp/goutmp_bsd.go b/vendor/blitter.com/go/goutmp/goutmp_bsd.go
deleted file mode 100644
index 4aada2d..0000000
--- a/vendor/blitter.com/go/goutmp/goutmp_bsd.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// +build freebsd
-
-// Golang bindings for basic login/utmp accounting
-package goutmp
-
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//
-//#include
-//
-//typedef char char_t;
-//
-//void pututmpx(struct utmpx* entry, char* uname, char* ptsname, char* host) {
-// entry->ut_type = USER_PROCESS;
-// entry->ut_pid = getpid();
-// strcpy(entry->ut_line, ptsname + strlen("/dev/"));
-//
-// strcpy(entry->ut_id, ptsname + strlen("/dev/pts/"));
-//
-// //entry->ut_time = time(NULL);
-// strcpy(entry->ut_user, uname);
-// strcpy(entry->ut_host, host);
-// //entry->ut_addr = 0;
-// setutxent();
-// pututxline(entry);
-//}
-//
-//void unpututmpx(struct utmpx* entry) {
-// entry->ut_type = DEAD_PROCESS;
-// entry->ut_line[0] = '\0';
-// //entry->ut_time = 0;
-// entry->ut_user[0] = '\0';
-// setutxent();
-// pututxline(entry);
-//
-// endutxent();
-//}
-//
-//#if 0
-//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
-// int retval = 0;
-// FILE *f;
-// struct lastlog l;
-//
-// strncpy(l.ll_line, line, UT_LINESIZE);
-// l.ll_line[UT_LINESIZE-1] = '\0';
-// strncpy(l.ll_host, host, UT_HOSTSIZE);
-// l.ll_host[UT_HOSTSIZE-1] = '\0';
-//
-// l.ll_time = (time_t)t;
-// //printf("l: ll_line '%s', ll_host '%s', ll_time %d\n", l.ll_line, l.ll_host, l.ll_time);
-//
-// /* Write lastlog entry at fixed offset (uid * sizeof(struct lastlog) */
-// if( NULL != (f = fopen("/var/log/lastlog", "rw+")) ) {
-// if( !fseek(f, (uid * sizeof(struct lastlog)), SEEK_SET) ) {
-// int fd = fileno(f);
-// if( write(fd, &l, sizeof(l)) == sizeof(l) ) {
-// retval = 1;
-// //int32_t stat = system("echo ---- lastlog ----; lastlog");
-// }
-// }
-// fclose(f);
-// }
-// return retval;
-//}
-//#else
-//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
-// return 0;
-//}
-//#endif
-import "C"
-
-import (
- "fmt"
- "net"
- "os/user"
- "strings"
- "time"
-)
-
-// UtmpEntry wraps the C struct utmp
-type UtmpEntry struct {
- entry C.struct_utmpx
-}
-
-// return remote client hostname or IP if host lookup fails
-// addr is expected to be of the format given by net.Addr.String()
-// eg., "127.0.0.1:80" or "[::1]:80"
-func GetHost(addr string) (h string) {
- if !strings.Contains(addr, "[") {
- h = strings.Split(addr, ":")[0]
- } else {
- h = strings.Split(strings.Split(addr, "[")[1], "]")[0]
- }
- hList, e := net.LookupAddr(h)
- //fmt.Printf("lookupAddr:%v\n", hList)
- if e == nil {
- h = hList[0]
- }
- return
-}
-
-// Put a username and the originating host/IP to utmp
-func Put_utmp(user, ptsName, host string) UtmpEntry {
- var entry UtmpEntry
-
- //log.Println("Put_utmp:host ", host, " user ", user)
- C.pututmpx(&entry.entry, C.CString(user), C.CString(ptsName), C.CString(host))
- return entry
-}
-
-// Remove a username/host entry from utmp
-func Unput_utmp(entry UtmpEntry) {
- C.unpututmpx(&entry.entry)
-}
-
-// Put the login app, username and originating host/IP to lastlog
-func Put_lastlog_entry(app, usr, ptsname, host string) {
- u, e := user.Lookup(usr)
- if e != nil {
- return
- }
- var uid uint32
- fmt.Sscanf(u.Uid, "%d", &uid)
-
- t := time.Now().Unix()
- _ = C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
- //stat := C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
- //fmt.Println("stat was:",stat)
-}
diff --git a/vendor/blitter.com/go/goutmp/goutmp_linux.go b/vendor/blitter.com/go/goutmp/goutmp_linux.go
deleted file mode 100644
index 10513b2..0000000
--- a/vendor/blitter.com/go/goutmp/goutmp_linux.go
+++ /dev/null
@@ -1,132 +0,0 @@
-// +build linux
-
-// Golang bindings for basic login/utmp accounting
-package goutmp
-
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//#include
-//
-//#include
-//#include
-//
-//typedef char char_t;
-//
-//
-//void pututmp(struct utmp* entry, char* uname, char* ptsname, char* host) {
-// entry->ut_type = USER_PROCESS;
-// entry->ut_pid = getpid();
-// strcpy(entry->ut_line, ptsname + strlen("/dev/"));
-//
-// strcpy(entry->ut_id, ptsname + strlen("/dev/pts/"));
-//
-// entry->ut_time = time(NULL);
-// strcpy(entry->ut_user, uname);
-// strcpy(entry->ut_host, host);
-// entry->ut_addr = 0;
-// setutent();
-// pututline(entry);
-//}
-//
-//void unpututmp(struct utmp* entry) {
-// entry->ut_type = DEAD_PROCESS;
-// memset(entry->ut_line, 0, UT_LINESIZE);
-// entry->ut_time = 0;
-// memset(entry->ut_user, 0, UT_NAMESIZE);
-// setutent();
-// pututline(entry);
-//
-// endutent();
-//}
-//
-//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
-// int retval = 0;
-// FILE *f;
-// struct lastlog l;
-//
-// strncpy(l.ll_line, line, UT_LINESIZE);
-// l.ll_line[UT_LINESIZE-1] = '\0';
-// strncpy(l.ll_host, host, UT_HOSTSIZE);
-// l.ll_host[UT_HOSTSIZE-1] = '\0';
-//
-// l.ll_time = (time_t)t;
-// //printf("l: ll_line '%s', ll_host '%s', ll_time %d\n", l.ll_line, l.ll_host, l.ll_time);
-//
-// /* Write lastlog entry at fixed offset (uid * sizeof(struct lastlog) */
-// if( NULL != (f = fopen("/var/log/lastlog", "rw+")) ) {
-// if( !fseek(f, (uid * sizeof(struct lastlog)), SEEK_SET) ) {
-// int fd = fileno(f);
-// if( write(fd, &l, sizeof(l)) == sizeof(l) ) {
-// retval = 1;
-// //int32_t stat = system("echo ---- lastlog ----; lastlog");
-// }
-// }
-// fclose(f);
-// }
-// return retval;
-//}
-import "C"
-
-import (
- "fmt"
- "net"
- "os/user"
- "strings"
- "time"
-)
-
-// UtmpEntry wraps the C struct utmp
-type UtmpEntry struct {
- entry C.struct_utmp
-}
-
-// return remote client hostname or IP if host lookup fails
-// addr is expected to be of the format given by net.Addr.String()
-// eg., "127.0.0.1:80" or "[::1]:80"
-func GetHost(addr string) (h string) {
- if !strings.Contains(addr, "[") {
- h = strings.Split(addr, ":")[0]
- } else {
- h = strings.Split(strings.Split(addr, "[")[1], "]")[0]
- }
- hList, e := net.LookupAddr(h)
- //fmt.Printf("lookupAddr:%v\n", hList)
- if e == nil {
- h = hList[0]
- }
- return
-}
-
-// Put a username and the originating host/IP to utmp
-func Put_utmp(user, ptsName, host string) UtmpEntry {
- var entry UtmpEntry
-
- //log.Println("Put_utmp:host ", host, " user ", user)
- C.pututmp(&entry.entry, C.CString(user), C.CString(ptsName), C.CString(host))
- return entry
-}
-
-// Remove a username/host entry from utmp
-func Unput_utmp(entry UtmpEntry) {
- C.unpututmp(&entry.entry)
-}
-
-// Put the login app, username and originating host/IP to lastlog
-func Put_lastlog_entry(app, usr, ptsname, host string) {
- u, e := user.Lookup(usr)
- if e != nil {
- return
- }
- var uid uint32
- fmt.Sscanf(u.Uid, "%d", &uid)
-
- t := time.Now().Unix()
- _ = C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
- //stat := C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
- //fmt.Println("stat was:",stat)
-}
diff --git a/vendor/blitter.com/go/groestl/README.md b/vendor/blitter.com/go/groestl/README.md
deleted file mode 100644
index 89c2dc0..0000000
--- a/vendor/blitter.com/go/groestl/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# groestl
-
-Grøstl hash function implementation in golang
-
-## Usage
-
-groestl library implements standard `hash.Hash` interface.
-
-You can also run it from command-line:
-```
-$ ./groestl
-Usage:
- ./groestl [options] path/to/file
-
-Options:
- -hash int
- output hash length (default 256)
-```
-
-To compile simply run `go build` in the root directory.
diff --git a/vendor/blitter.com/go/groestl/groestl.go b/vendor/blitter.com/go/groestl/groestl.go
deleted file mode 100644
index 953f22f..0000000
--- a/vendor/blitter.com/go/groestl/groestl.go
+++ /dev/null
@@ -1,190 +0,0 @@
-// Package groestl provides core groestl functionality. It's based on groestl's
-// implementation guide with references in C code.
-package groestl
-
-import (
- "encoding/binary"
- "fmt"
- "hash"
-)
-
-// Toggle verbose output with detailed description of every algorithm's step
-const VERBOSE = false
-
-// Struct digest is being used during algorithm execution. Provides easy
-// access to all information about current state of data processing.
-type digest struct {
- hashbitlen int
- chaining [16]uint64
- blocks uint64
- buf [128]byte
- nbuf int
- columns int
- rounds int
-}
-
-// Equivalent to Init from reference implementation. Initiates values
-// for digest struct, therefore determines exact type of groestl algorithm.
-func (d *digest) Reset() {
- for i, _ := range d.chaining {
- d.chaining[i] = 0
- }
-
- d.blocks = 0
- d.nbuf = 0
-
- if d.hashbitlen <= 256 {
- d.columns = 8
- d.rounds = 10
- } else {
- d.columns = 16
- d.rounds = 14
- }
-
- d.chaining[d.columns-1] = uint64(d.hashbitlen)
-}
-
-// Each New...() function creates new hash digest and initiates it
-// for according hash size.
-func New224() hash.Hash {
- d := new(digest)
- d.hashbitlen = 224
- d.Reset()
- return d
-}
-
-func New256() hash.Hash {
- d := new(digest)
- d.hashbitlen = 256
- d.Reset()
- return d
-}
-
-func New384() hash.Hash {
- d := new(digest)
- d.hashbitlen = 384
- d.Reset()
- return d
-}
-
-func New512() hash.Hash {
- d := new(digest)
- d.hashbitlen = 512
- d.Reset()
- return d
-}
-
-// Default function for creating hash digest for 256bit groestl.
-func New() hash.Hash {
- return New256()
-}
-
-// Return size of digest
-func (d *digest) Size() int {
- return d.hashbitlen
-}
-
-// Return block size for digest. For hash bigger than 256 bit block
-// size is 128, otherwise it's 64.
-func (d *digest) BlockSize() int {
- if d.hashbitlen <= 256 {
- return 64
- } else {
- return 128
- }
-}
-
-// Equivalent to Update form reference implementation. Performs processing
-// on all data except the last block that might need padding.
-func (d *digest) Write(p []byte) (n int, err error) {
- n = len(p)
- if d.nbuf > 0 {
- nn := copy(d.buf[d.nbuf:], p)
- d.nbuf += nn
- if d.nbuf == d.BlockSize() {
- err = d.transform(d.buf[:d.BlockSize()])
- if err != nil {
- panic(err)
- }
- d.nbuf = 0
- }
- p = p[nn:]
- }
- if len(p) >= d.BlockSize() {
- nn := len(p) &^ (d.BlockSize() - 1)
- err = d.transform(p[:nn])
- if err != nil {
- panic(err)
- }
- p = p[nn:]
- }
- if len(p) > 0 {
- d.nbuf = copy(d.buf[:], p)
- }
- return
-}
-
-func (d *digest) Sum(in []byte) []byte {
- d0 := *d
- hash := d0.checkSum()
- fmt.Printf("groestl-hash:len:%b %v\r\n", len(hash), hash)
- return append(in, hash...)
-}
-
-// Equivalent to Final from reference implementation. Creates padding
-// for last block of data and performs final output transformation and trumcate.
-// Returns hash value.
-func (d *digest) checkSum() []byte {
- bs := d.BlockSize()
- var tmp [128]byte
- tmp[0] = 0x80
-
- if d.nbuf > (bs - 8) {
- d.Write(tmp[:(bs - d.nbuf)])
- d.Write(tmp[8:bs])
- } else {
- d.Write(tmp[0:(bs - d.nbuf - 8)])
- }
-
- binary.BigEndian.PutUint64(tmp[:], d.blocks+1)
- d.Write(tmp[:8])
-
- if d.nbuf != 0 {
- panic("padding failed")
- }
-
- d.finalTransform()
-
- // store chaining in output byteslice
- hash := make([]byte, d.columns*4)
- for i := 0; i < d.columns/2; i++ {
- binary.BigEndian.PutUint64(hash[(i*8):(i+1)*8], d.chaining[i+(d.columns/2)])
- }
- hash = hash[(len(hash) - d.hashbitlen/8):]
- return hash
-}
-
-// Each Sum...() function returns according hash value for provided data.
-func Sum224(data []byte) []byte {
- d := New224().(*digest)
- d.Write(data)
- return d.checkSum()
-}
-
-func Sum256(data []byte) []byte {
- d := New256().(*digest)
- d.Write(data)
- return d.checkSum()
-}
-
-func Sum384(data []byte) []byte {
- d := New384().(*digest)
- d.Write(data)
- return d.checkSum()
-}
-
-func Sum512(data []byte) []byte {
- d := New512().(*digest)
- d.Write(data)
- return d.checkSum()
-}
diff --git a/vendor/blitter.com/go/groestl/round.go b/vendor/blitter.com/go/groestl/round.go
deleted file mode 100644
index a32c1b1..0000000
--- a/vendor/blitter.com/go/groestl/round.go
+++ /dev/null
@@ -1,217 +0,0 @@
-package groestl
-
-import (
- "encoding/binary"
- "fmt"
-)
-
-func buildColumns(data []byte, cols chan uint64) {
- for i, l := 8, len(data); i <= l; i += 8 {
- cols <- binary.BigEndian.Uint64(data[i-8 : i])
- }
- close(cols)
-}
-
-// Performs compression function. Returns nil on success, error otherwise.
-func (d *digest) transform(data []byte) error {
- if (len(data) % d.BlockSize()) != 0 {
- return fmt.Errorf("data len in transform is not a multiple of BlockSize")
- }
-
- cols := make(chan uint64)
- go buildColumns(data, cols)
-
- eb := d.blocks + uint64(len(data)/d.BlockSize())
- for d.blocks < eb {
- m := make([]uint64, d.columns)
- hxm := make([]uint64, d.columns)
-
- for i := 0; i < d.columns; i++ {
- m[i] = <-cols
- hxm[i] = d.chaining[i] ^ m[i]
- }
-
- if VERBOSE {
- fmt.Println("\n========================================\n")
- fmt.Println("Block Contents:")
- printUintSlice(m)
- fmt.Println()
- }
-
- hxm = round(d, hxm, 'P')
- m = round(d, m, 'Q')
-
- for i := 0; i < d.columns; i++ {
- d.chaining[i] ^= hxm[i] ^ m[i]
- }
-
- d.blocks += 1
-
- if VERBOSE {
- fmt.Println("P(h+m) + Q(m) + h =")
- printUintSlice(d.chaining[:d.columns])
- fmt.Println()
- }
- }
-
- return nil
-}
-
-// Performs last compression. After this function, data
-// is ready for truncation.
-func (d *digest) finalTransform() {
- h := make([]uint64, d.columns)
-
- for i := 0; i < d.columns; i++ {
- h[i] = d.chaining[i]
- }
-
- if VERBOSE {
- fmt.Println("\n========================================\n")
- fmt.Println("Output transformation:\n")
- }
-
- h = round(d, h, 'P')
-
- for i := 0; i < d.columns; i++ {
- d.chaining[i] ^= h[i]
- }
-
- d.blocks += 1
-
- if VERBOSE {
- fmt.Println("P(h) + h =")
- printUintSlice(d.chaining[:d.columns])
- fmt.Println("\n---------------------------------------\n")
- }
-}
-
-// Performs whole set of rounds on data provided in x. Variant denotes type
-// of permutation being performed. P and Q are for groestl-512
-// and lowercase are for groestl-256
-func round(d *digest, x []uint64, variant rune) []uint64 {
- if VERBOSE {
- fmt.Println(":: BEGIN " + string(variant))
- defer fmt.Println(":: END " + string(variant) + "\n")
- fmt.Println("Input:")
- printUintSlice(x)
- }
-
- if d.BlockSize() == 64 {
- // for smaller blocksize change variant to lowercase letter
- variant += 0x20
- }
-
- for i := 0; i < d.rounds; i++ {
- x = addRoundConstant(x, i, variant)
- if VERBOSE {
- fmt.Printf("t=%d (AddRoundConstant):\n", i)
- printUintSlice(x)
- }
- x = subBytes(x)
- if VERBOSE {
- fmt.Printf("t=%d (SubBytes):\n", i)
- printUintSlice(x)
- }
- x = shiftBytes(x, variant)
- if VERBOSE {
- fmt.Printf("t=%d (ShiftBytes):\n", i)
- printUintSlice(x)
- }
- x = mixBytes(x)
- if VERBOSE {
- fmt.Printf("t=%d (MixBytes):\n", i)
- printUintSlice(x)
- }
- }
-
- return x
-}
-
-// AddRoundConstant transformation for data provided in x. Variant denotes type
-// of permutation being performed. P and Q are for groestl-512
-// and lowercase are for groestl-256
-func addRoundConstant(x []uint64, r int, variant rune) []uint64 {
- switch variant {
- case 'P', 'p':
- for i, l := 0, len(x); i < l; i++ {
- // byte from row 0: ((col >> (8*7)) & 0xFF)
- // we want to xor the byte below with row 0
- // therefore we have to shift it by 8*7 bits
- x[i] ^= uint64((i<<4)^r) << (8 * 7)
- }
- case 'Q', 'q':
- for i, l := 0, len(x); i < l; i++ {
- x[i] ^= ^uint64(0) ^ uint64((i<<4)^r)
- }
- }
- return x
-}
-
-// SubBytes transformation for data provided in x.
-func subBytes(x []uint64) []uint64 {
- var newCol [8]byte
- for i, l := 0, len(x); i < l; i++ {
- for j := 0; j < 8; j++ {
- newCol[j] = sbox[pickRow(x[i], j)]
- }
- x[i] = binary.BigEndian.Uint64(newCol[:])
- }
- return x
-}
-
-// ShiftBytes transformation for data provided in x. Variant denotes type
-// of permutation being performed. P and Q are for groestl-512
-// and lowercase are for groestl-256
-func shiftBytes(x []uint64, variant rune) []uint64 {
- var shiftVector [8]int
- switch variant {
- case 'p':
- shiftVector = [8]int{0, 1, 2, 3, 4, 5, 6, 7}
- case 'P':
- shiftVector = [8]int{0, 1, 2, 3, 4, 5, 6, 11}
- case 'q':
- shiftVector = [8]int{1, 3, 5, 7, 0, 2, 4, 6}
- case 'Q':
- shiftVector = [8]int{1, 3, 5, 11, 0, 2, 4, 6}
- }
- l := len(x)
- ret := make([]uint64, l)
- for i := 0; i < l; i++ {
- ret[i] = uint64(pickRow(x[(i+shiftVector[0])%l], 0))
- for j := 1; j < 8; j++ {
- ret[i] <<= 8
- ret[i] ^= uint64(pickRow(x[(i+shiftVector[j])%l], j))
- }
- }
- return ret
-}
-
-// MixBytes transformation for data provided in x.
-func mixBytes(x []uint64) []uint64 {
- // this part is tricky
- // so here comes yet another rough translation straight from reference implementation
-
- mul2 := func(b uint8) uint8 { return uint8((b << 1) ^ (0x1B * ((b >> 7) & 1))) }
- mul3 := func(b uint8) uint8 { return (mul2(b) ^ (b)) }
- mul4 := func(b uint8) uint8 { return mul2(mul2(b)) }
- mul5 := func(b uint8) uint8 { return (mul4(b) ^ (b)) }
- mul7 := func(b uint8) uint8 { return (mul4(b) ^ mul2(b) ^ (b)) }
-
- var temp [8]uint8
- for i, l := 0, len(x); i < l; i++ {
- for j := 0; j < 8; j++ {
- temp[j] =
- mul2(pickRow(x[i], (j+0)%8)) ^
- mul2(pickRow(x[i], (j+1)%8)) ^
- mul3(pickRow(x[i], (j+2)%8)) ^
- mul4(pickRow(x[i], (j+3)%8)) ^
- mul5(pickRow(x[i], (j+4)%8)) ^
- mul3(pickRow(x[i], (j+5)%8)) ^
- mul5(pickRow(x[i], (j+6)%8)) ^
- mul7(pickRow(x[i], (j+7)%8))
- }
- x[i] = binary.BigEndian.Uint64(temp[:])
- }
- return x
-}
diff --git a/vendor/blitter.com/go/groestl/sbox.go b/vendor/blitter.com/go/groestl/sbox.go
deleted file mode 100644
index 207d405..0000000
--- a/vendor/blitter.com/go/groestl/sbox.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package groestl
-
-var sbox = [256]byte{
- 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
- 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
- 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
- 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
- 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
- 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
- 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
- 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
- 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
- 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
- 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
- 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
- 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
- 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
- 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
- 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
-}
diff --git a/vendor/blitter.com/go/groestl/util.go b/vendor/blitter.com/go/groestl/util.go
deleted file mode 100644
index 72b19c4..0000000
--- a/vendor/blitter.com/go/groestl/util.go
+++ /dev/null
@@ -1,28 +0,0 @@
-package groestl
-
-import (
- "encoding/hex"
- "fmt"
-)
-
-// Function for printing hash output from groestl.Sum()
-func PrintHash(hash []byte) {
- fmt.Println(hex.EncodeToString(hash))
-}
-
-// Helper function for returning i'th row from provided
-// uint64
-func pickRow(col uint64, i int) byte {
- return byte((col >> (8 * (7 - i))) & 0xFF)
-}
-
-// Prints provided x slice in form of block.
-func printUintSlice(x []uint64) {
- l := len(x)
- for i := 0; i < 8; i++ {
- for j := 0; j < l; j++ {
- fmt.Printf("%02x ", pickRow(x[j], i))
- }
- fmt.Println()
- }
-}
diff --git a/vendor/blitter.com/go/herradurakex/LICENSE.gpl b/vendor/blitter.com/go/herradurakex/LICENSE.gpl
deleted file mode 100644
index 94a9ed0..0000000
--- a/vendor/blitter.com/go/herradurakex/LICENSE.gpl
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc.
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-
- Copyright (C)
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- Copyright (C)
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-.
diff --git a/vendor/blitter.com/go/herradurakex/LICENSE.mit b/vendor/blitter.com/go/herradurakex/LICENSE.mit
deleted file mode 100644
index 4acc92f..0000000
--- a/vendor/blitter.com/go/herradurakex/LICENSE.mit
+++ /dev/null
@@ -1,22 +0,0 @@
-MIT License
-
-Copyright (c) 2017 - 2018 Omar Alejandro Herrera Reyna (core HerraduraKEx)
-Copyright (c) 2017 - 2018 Russell Magee (hkexsh/hkexshd/hkexpasswd)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/vendor/blitter.com/go/herradurakex/Makefile b/vendor/blitter.com/go/herradurakex/Makefile
deleted file mode 100644
index e59fbc7..0000000
--- a/vendor/blitter.com/go/herradurakex/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-.PHONY: info clean all lib
-
-all: lib
-
-clean:
- go clean .
-
-lib: info
- go install .
-
-ifneq ($(MSYSTEM),)
-info:
- @echo "building for Windows (MSYS)"
-else
-info:
- @echo "building for Linux"
-endif
diff --git a/vendor/blitter.com/go/herradurakex/README.md b/vendor/blitter.com/go/herradurakex/README.md
deleted file mode 100644
index 82b7368..0000000
--- a/vendor/blitter.com/go/herradurakex/README.md
+++ /dev/null
@@ -1 +0,0 @@
-HerraduraKEx - an experimental Key Encapsulation Mechanism
diff --git a/vendor/blitter.com/go/herradurakex/herradurakex.go b/vendor/blitter.com/go/herradurakex/herradurakex.go
deleted file mode 100644
index 9621e94..0000000
--- a/vendor/blitter.com/go/herradurakex/herradurakex.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Package hkex - an experimental key exchange algorithm
-// by Omar Alejandro Herrera Reyna.
-//
-// (https://github.com/Caume/HerraduraKEx)
-//
-// The core HerraduraKEx algorithm is dual-licensed
-// by the author (Omar Alejandro Herrera Reyna)
-// under GPL3 and MIT licenses.
-// See LICENSE.gpl and LICENSE.mit in this distribution
-//
-// Go implementation Copyright (c) 2017-2018 Russell Magee
-// (rmagee_at_gmail_com)
-// Licensed under the terms of the MIT license
-// See LICENSE.mit in this distribution
-package hkex
-
-/* Herradura - a Key exchange scheme in the style of Diffie-Hellman Key Exchange.
- Copyright (C) 2017 Omar Alejandro Herrera Reyna
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
-
- golang implementation by Russ Magee (rmagee_at_gmail.com) */
-
-/* This is the core KEx algorithm. For client/server net support code,
-See the hkexnet package (currently a sub-package of hkexsh) for a
-golang/pkg/net compatible Conn interface using this to negotiate keys and
-secure a network channel. */
-
-import (
- "fmt"
- "math/big"
- "math/rand"
- "time"
-)
-
-// HerraduraKEx holds the session state for a key exchange.
-type HerraduraKEx struct {
- intSz, pubSz int
- randctx *rand.Rand
- a *big.Int
- b *big.Int
- d, peerD *big.Int
- fa *big.Int
-}
-
-// New returns a HerraduraKEx struct.
-//
-// i - internal (private) random nonce
-// p - public (exchanged) random nonce (typically 1/4 bitsize of i)
-//
-// If i or p are passed as zero, they will default to 256 and 64,
-// respectively.
-func New(i int, p int) (h *HerraduraKEx) {
- h = new(HerraduraKEx)
-
- if i == 0 {
- i = 256
- }
- if p == 0 {
- p = 64
- }
-
- h.intSz = i
- h.pubSz = p
-
- h.seed()
- h.a = h.rand()
- h.b = h.rand()
-
- h.d = h.fscxRevolve(h.a, h.b, h.pubSz)
- return h
-}
-
-func (h *HerraduraKEx) seed() {
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- h.randctx = r
-}
-
-func (h *HerraduraKEx) rand() (v *big.Int) {
- v = big.NewInt(0)
- v.Rand(h.randctx, h.getMax())
- return v
-}
-
-// getMax returns the max value for an n-bit big.Int
-func (h *HerraduraKEx) getMax() (n *big.Int) {
- n = big.NewInt(0)
- var max big.Int
-
- for i := 0; i < h.intSz; i++ {
- max.SetBit(n, i, 1)
- }
- n = &max
- return n
-}
-
-func (h *HerraduraKEx) bitX(x *big.Int, pos int) (ret int64) {
- if pos < 0 {
- pos = h.intSz - pos
- }
-
- if pos == 0 {
- ret = int64(x.Bit(1) ^ x.Bit(0) ^ x.Bit(h.intSz-1))
- } else if pos == h.intSz-1 {
- ret = int64(x.Bit(0) ^ x.Bit(pos) ^ x.Bit(pos-1))
- } else {
- ret = int64(x.Bit((pos+1)%h.intSz) ^ x.Bit(pos) ^ x.Bit(pos-1))
- }
- return ret
-}
-
-func (h *HerraduraKEx) bit(up, down *big.Int, posU, posD int) (ret *big.Int) {
- return big.NewInt(h.bitX(up, posU) ^ h.bitX(down, posD))
-}
-
-func (h *HerraduraKEx) fscx(up, down *big.Int) (result *big.Int) {
- result = big.NewInt(0)
-
- for count := 0; count < h.intSz; count++ {
- result.Lsh(result, 1)
- result.Add(result, h.bit(up, down, count, count))
- }
- return result
-}
-
-// This is the iteration function using the result of the previous iteration
-// as the first parameter and the second parameter of the first iteration.
-func (h *HerraduraKEx) fscxRevolve(x, y *big.Int, passes int) (result *big.Int) {
- result = x
- for count := 0; count < passes; count++ {
- result = h.fscx(result, y)
- }
- return result
-}
-
-// D returns the D (FSCX Revolved) value, input to generate FA
-// (the value for peer KEx)
-func (h HerraduraKEx) D() *big.Int {
- return h.d
-}
-
-// PeerD returns the peer D value
-func (h HerraduraKEx) PeerD() *big.Int {
- return h.peerD
-}
-
-// SetPeerD stores the received peer's D value (contents, not ptr)
-func (h *HerraduraKEx) SetPeerD(pd *big.Int) {
- h.peerD = new(big.Int).Set(pd)
-}
-
-// ComputeFA computes the FA value, which must be sent to peer for KEx.
-func (h *HerraduraKEx) ComputeFA() {
- h.fa = h.fscxRevolve(h.peerD, h.b, h.intSz-h.pubSz)
- h.fa.Xor(h.fa, h.a)
-}
-
-// FA returns the computed FA value
-func (h HerraduraKEx) FA() *big.Int {
- return h.fa
-}
-
-// Output HerraduraKEx type value as a string. Implements Stringer interface.
-func (h *HerraduraKEx) String() string {
- return fmt.Sprintf("s:%d p:%d\na:%s\nb:%s\nd:->%s\n<-peerD:%s\nfa:%s",
- h.intSz, h.pubSz,
- h.a.Text(16), h.b.Text(16),
- h.d.Text(16),
- h.peerD.Text(16),
- h.fa.Text(16))
-}
diff --git a/vendor/blitter.com/go/hopscotch/README.md b/vendor/blitter.com/go/hopscotch/README.md
deleted file mode 100644
index 735c42e..0000000
--- a/vendor/blitter.com/go/hopscotch/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# hopscotch
-
-Experimental cipher using multiple hash algs for keystream.
-
-The cipher uses multiple trusted hash algorithms, each updated on a schedule (the security factor, 1 to 10) based initially on the secret key, then on random data from a PRNG (currently MTWIST-64, also seeded from the secret key). The XOR value used to encrypt plaintext is picked from bytes of the hash outputs (being appended together into a single pool P), the hash output byte chosen used then as a modulus value to 'hop' to the next XOR value within P (hence the name 'hopscotch').
-
-The security of the algorithm is premised on the following axioms:
-
-1. all hash algorithms used are sufficiently unpredictable in their output based on given input (most importantly the initial key as input to the PRNG and its seeded output);
-2. the PRNG is sufficiently random so as to guarantee subsequent input to all hash algorithms used as potential keystream pool bytes for XOR operations is not predictable;
-3. the keystream (being the hash outputs at a given time for all hash algorithms used) is re-keyed often enough to prevent excessive re-use of bytes ('excessive' being defined by the strength parameter, restricted from 1 to 10 inclusive by empirical analysis)
-
-Current implementation uses 2 hash algorithms, SHA512 and BLAKE2B, both giving outputs of fixed-length = 64 bytes. Empirically, using security factors ranging from 1 to 10 (count of input bytes encrypted before re-keying by feeding 32 bytes of PRNG data to derive new hash output XOR pool P) it is unlikely that 64 picks from the pool would re-use the same bytes often enough to compromise security. Tests with the ['circle' analysis tool](https://github.com/circulosmeos/circle) and the 'Tux.ppm' image test indicate ciphertext does not resemble plaintext in any obvious manner. (TODO: diehard tests or others?)
-
-The use purely of a PRNG plus two or more already-proven hash algorithms as keystream pool material offers a simply-verified security-primitive basis for confidence, plus easy extendability by adding more hash algorithms to the keystream pool P, without complexity of a full re-analysis. So long as each individual hash algorithm is considered safe, hopping between and within the output bytes of each to derive keystream XOR material should also be safe so long as hash output bytes are not re-used extensively.
-
-
-Throughput
---
-On a modest test AMD (Linux amd_x64) encryption rates of approx. 140Mbits/s are achieved (-m 4). As this is a pure Go implementation and little effort has been put into optimization it is reasonable to expect higher rates could be achieved in the future.
-
-```
-$ time ./cmd -k "SuperSecret#@11ElevenTy" blank700MBenc.bin
-
-real 0m40.096s
-user 0m38.318s
-sys 0m2.133s
-
-```
\ No newline at end of file
diff --git a/vendor/blitter.com/go/hopscotch/hopscotch.go b/vendor/blitter.com/go/hopscotch/hopscotch.go
deleted file mode 100644
index d462344..0000000
--- a/vendor/blitter.com/go/hopscotch/hopscotch.go
+++ /dev/null
@@ -1,186 +0,0 @@
-// Package hopscotch - a crypto doodle that uses multiple hash
-// algorithm outputs as dynamic sbox/pbox material
-//
-// Properties visualized using https://github.com/circulosmeos/circle
-package hopscotch
-
-// TODOs:
-// -define s-box rotation/shuffle schema
-// -devise p-box schema
-// ...
-
-import (
- "errors"
- "fmt"
- "hash"
- "io"
- "time"
-
- mtwist "blitter.com/go/mtwist" // Used to derive hash fodder after seeding w/key
-
- // hash algos must be manually imported thusly:
- // (Would be nice if the golang pkg docs were more clear
- // on this...)
- "crypto/sha512"
- _ "crypto/sha512"
-
- b2b "golang.org/x/crypto/blake2b"
- groestl "blitter.com/go/groestl"
-)
-
-const (
- maxResched = 99 // above 20 starts to show outlines in 'tuxtest' ... so 10 max
-)
-
-type Cipher struct {
- resched int // lower (1) == stronger encryption; weakest (10) == weakest
- rounds int
- prng *mtwist.MT19937_64 // used to gen initial hash fodder from key
- h []hash.Hash
- hs []byte
- r io.Reader
- w io.Writer
- idx int
- ctr int
- rekeyCtr int // must be min of len( c.h[] )
- bTmp byte
- k []byte
-}
-
-func New(r io.Reader, w io.Writer, resched int, key []byte) (c *Cipher) {
- if resched < 1 || resched > maxResched {
- resched = 4
- }
-
- c = &Cipher{}
- c.resched = resched
- c.rounds = 1
- c.prng = mtwist.New()
- c.r = r
- c.w = w
-
- if len(key) == 0 {
- c.k = []byte(fmt.Sprintf("%s", time.Now()))
- } else {
- c.k = key
- }
- c.prng.SeedFullState(c.k)
-
- // Discard first 64 bytes of MT output
- for idx := 0; idx < 64; idx++ {
- _ = c.prng.Int63()
- }
-
- // Init all the hash algs we're going to 'hop' around with initial keystream
- c.h = make([]hash.Hash, 3)
- c.h[0] = sha512.New()
- c.h[1], _ = b2b.New512(c.k)
- c.h[2] = groestl.New512()
- c.keyUpdate(c.k)
-
- c.rekeyCtr = len(c.hs) * c.resched // lower multiplier == greater security, lower speed
- //fmt.Fprintf(os.Stderr, "rekeyCtr = %v\n", c.rekeyCtr)
- return c
-}
-
-func (c *Cipher) Read(p []byte) (n int, err error) {
- n, err = c.r.Read(p)
- if err == nil {
- for idx := 0; idx < n; idx++ {
- p[idx] = c.yield(p[idx])
- }
- }
- return n, err
-}
-
-func (c *Cipher) Write(p []byte) (n int, err error) {
- n, err = c.w.Write(p)
- return n, err
-}
-
-// Mutate the session key (intended to be called as encryption proceeds)
-func (c *Cipher) keyUpdate(data []byte) {
- //fmt.Fprintln(os.Stderr, "--rekey--")
- {
- c.h[0].Write(data)
- sliceTmp := sha512.Sum512(data)
- c.hs = sliceTmp[:]
- }
- {
- c.h[1].Write(data)
- sliceTmp := b2b.Sum512(data)
- c.hs = append(c.hs, sliceTmp[:]...)
- }
- {
- c.h[2].Write(data)
- sliceTmp := groestl.Sum512(data)
- c.hs = append(c.hs, sliceTmp[:]...)
- }
-}
-
-func (c *Cipher) yield(ib byte) (ob byte) {
- c.idx = (c.ctr + c.idx + int(c.bTmp)) % len(c.hs)
- c.bTmp = c.hs[c.idx]
- c.ctr = c.ctr + 1
- //fmt.Fprintf(os.Stderr, "[c.hidx:%v c.idx:%v]\n", c.hidx, c.idx)
-
- // NOTE: using a non-prime modulus degrades CV % from ~ 0.055 to ~ 0.07
- switch c.ctr % 5 {
- case 0:
- ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
- c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3] ^ c.hs[len(c.hs)-5] ^
- c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^
- c.hs[len(c.hs)-47] ^ c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39]
-
- case 1:
- ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
- c.hs[len(c.hs)-5] ^ c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^
- c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-29] ^
- c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-37]
-
- case 2:
- ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
- c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
- c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3] ^
- c.hs[len(c.hs)-37] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-47]
- case 3:
- ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
- c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
- c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-5] ^ c.hs[len(c.hs)-3] ^
- c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-37]
- case 4:
- ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
- c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
- c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-3] ^
- c.hs[len(c.hs)-33] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-45] ^ c.hs[len(c.hs)-43]
- }
-
- if c.ctr%c.rekeyCtr == 0 {
- bufTmp := make([]byte, 16*3)
- _, _ = c.prng.Read(bufTmp)
- c.keyUpdate(bufTmp)
- }
-
- return
-}
-
-// XORKeyStream XORs each byte in the given slice with a byte from the
-// cipher's key stream. Dst and src must overlap entirely or not at all.
-//
-// If len(dst) < len(src), XORKeyStream should panic. It is acceptable
-// to pass a dst bigger than src, and in that case, XORKeyStream will
-// only update dst[:len(src)] and will not touch the rest of dst.
-//
-// Multiple calls to XORKeyStream behave as if the concatenation of
-// the src buffers was passed in a single run. That is, Stream
-// maintains state and does not reset at each XORKeyStream call.
-func (c *Cipher) XORKeyStream(dst, src []byte) {
- //fmt.Printf("len dst:%d len src:%d\n", len(dst), len(src))
- if len(dst) < len(src) {
- panic(errors.New("len(dst) < len(src)"))
- }
-
- for idx, v := range src {
- dst[idx] = c.yield(v)
- }
-}
diff --git a/vendor/blitter.com/go/kyber/.gitignore b/vendor/blitter.com/go/kyber/.gitignore
deleted file mode 100644
index d38c149..0000000
--- a/vendor/blitter.com/go/kyber/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.swp
-*~
diff --git a/vendor/blitter.com/go/kyber/LICENSE b/vendor/blitter.com/go/kyber/LICENSE
deleted file mode 100644
index 6ca207e..0000000
--- a/vendor/blitter.com/go/kyber/LICENSE
+++ /dev/null
@@ -1,122 +0,0 @@
-Creative Commons Legal Code
-
-CC0 1.0 Universal
-
- CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
- LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
- ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
- INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
- REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
- PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
- THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
- HEREUNDER.
-
-Statement of Purpose
-
-The laws of most jurisdictions throughout the world automatically confer
-exclusive Copyright and Related Rights (defined below) upon the creator
-and subsequent owner(s) (each and all, an "owner") of an original work of
-authorship and/or a database (each, a "Work").
-
-Certain owners wish to permanently relinquish those rights to a Work for
-the purpose of contributing to a commons of creative, cultural and
-scientific works ("Commons") that the public can reliably and without fear
-of later claims of infringement build upon, modify, incorporate in other
-works, reuse and redistribute as freely as possible in any form whatsoever
-and for any purposes, including without limitation commercial purposes.
-These owners may contribute to the Commons to promote the ideal of a free
-culture and the further production of creative, cultural and scientific
-works, or to gain reputation or greater distribution for their Work in
-part through the use and efforts of others.
-
-For these and/or other purposes and motivations, and without any
-expectation of additional consideration or compensation, the person
-associating CC0 with a Work (the "Affirmer"), to the extent that he or she
-is an owner of Copyright and Related Rights in the Work, voluntarily
-elects to apply CC0 to the Work and publicly distribute the Work under its
-terms, with knowledge of his or her Copyright and Related Rights in the
-Work and the meaning and intended legal effect of CC0 on those rights.
-
-1. Copyright and Related Rights. A Work made available under CC0 may be
-protected by copyright and related or neighboring rights ("Copyright and
-Related Rights"). Copyright and Related Rights include, but are not
-limited to, the following:
-
- i. the right to reproduce, adapt, distribute, perform, display,
- communicate, and translate a Work;
- ii. moral rights retained by the original author(s) and/or performer(s);
-iii. publicity and privacy rights pertaining to a person's image or
- likeness depicted in a Work;
- iv. rights protecting against unfair competition in regards to a Work,
- subject to the limitations in paragraph 4(a), below;
- v. rights protecting the extraction, dissemination, use and reuse of data
- in a Work;
- vi. database rights (such as those arising under Directive 96/9/EC of the
- European Parliament and of the Council of 11 March 1996 on the legal
- protection of databases, and under any national implementation
- thereof, including any amended or successor version of such
- directive); and
-vii. other similar, equivalent or corresponding rights throughout the
- world based on applicable law or treaty, and any national
- implementations thereof.
-
-2. Waiver. To the greatest extent permitted by, but not in contravention
-of, applicable law, Affirmer hereby overtly, fully, permanently,
-irrevocably and unconditionally waives, abandons, and surrenders all of
-Affirmer's Copyright and Related Rights and associated claims and causes
-of action, whether now known or unknown (including existing as well as
-future claims and causes of action), in the Work (i) in all territories
-worldwide, (ii) for the maximum duration provided by applicable law or
-treaty (including future time extensions), (iii) in any current or future
-medium and for any number of copies, and (iv) for any purpose whatsoever,
-including without limitation commercial, advertising or promotional
-purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
-member of the public at large and to the detriment of Affirmer's heirs and
-successors, fully intending that such Waiver shall not be subject to
-revocation, rescission, cancellation, termination, or any other legal or
-equitable action to disrupt the quiet enjoyment of the Work by the public
-as contemplated by Affirmer's express Statement of Purpose.
-
-3. Public License Fallback. Should any part of the Waiver for any reason
-be judged legally invalid or ineffective under applicable law, then the
-Waiver shall be preserved to the maximum extent permitted taking into
-account Affirmer's express Statement of Purpose. In addition, to the
-extent the Waiver is so judged Affirmer hereby grants to each affected
-person a royalty-free, non transferable, non sublicensable, non exclusive,
-irrevocable and unconditional license to exercise Affirmer's Copyright and
-Related Rights in the Work (i) in all territories worldwide, (ii) for the
-maximum duration provided by applicable law or treaty (including future
-time extensions), (iii) in any current or future medium and for any number
-of copies, and (iv) for any purpose whatsoever, including without
-limitation commercial, advertising or promotional purposes (the
-"License"). The License shall be deemed effective as of the date CC0 was
-applied by Affirmer to the Work. Should any part of the License for any
-reason be judged legally invalid or ineffective under applicable law, such
-partial invalidity or ineffectiveness shall not invalidate the remainder
-of the License, and in such case Affirmer hereby affirms that he or she
-will not (i) exercise any of his or her remaining Copyright and Related
-Rights in the Work or (ii) assert any associated claims and causes of
-action with respect to the Work, in either case contrary to Affirmer's
-express Statement of Purpose.
-
-4. Limitations and Disclaimers.
-
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
- surrendered, licensed or otherwise affected by this document.
- b. Affirmer offers the Work as-is and makes no representations or
- warranties of any kind concerning the Work, express, implied,
- statutory or otherwise, including without limitation warranties of
- title, merchantability, fitness for a particular purpose, non
- infringement, or the absence of latent or other defects, accuracy, or
- the present or absence of errors, whether or not discoverable, all to
- the greatest extent permissible under applicable law.
- c. Affirmer disclaims responsibility for clearing rights of other persons
- that may apply to the Work or any use thereof, including without
- limitation any person's Copyright and Related Rights in the Work.
- Further, Affirmer disclaims responsibility for obtaining any necessary
- consents, permissions or other rights required for any use of the
- Work.
- d. Affirmer understands and acknowledges that Creative Commons is not a
- party to this document and has no duty or obligation with respect to
- this CC0 or use of the Work.
-
diff --git a/vendor/blitter.com/go/kyber/README.md b/vendor/blitter.com/go/kyber/README.md
deleted file mode 100644
index 634901c..0000000
--- a/vendor/blitter.com/go/kyber/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-### Kyber - IND-CCA2-secure Key Encapsulation Mechanism
-#### Yawning Angel (yawning at schwanenlied dot me)
-
-[![GoDoc](https://godoc.org/git.schwanenlied.me/yawning/kyber.git?status.svg)](https://godoc.org/git.schwanenlied.me/yawning/kyber.git)
-
-This package implements the Kyber IND-CCA2-secure key encapsulation
-mechanism (KEM), based on the hardness of solving the learning-with-errors
-(LWE) problem over module lattices as submitted to the NIST Post-Quantum
-Cryptography project.
-
-This implementation is a port of the Public Domain reference implementation
-by Joppe Bos, Léo Ducas, Eike Kiltz , Tancrède Lepoint, Vadim Lyubashevsky,
-John Schanck, Peter Schwabe, Gregor Seiler, and Damien Stehlé.
-
-Additionally implementations of Kyber.AKE and Kyber.UAKE as presented in
-the Kyber paper are included for users that seek an authenticated key
-exchange.
-
-Note that the algorithm is not finalized yet, and may change in a backward
-incompatible manner in the future. The designers currently recommend
-combining Kyber with an established pre-quantum algorithm like ECDH, and
-using the Kyber-768 parameter set.
-
-For more information, see the [project home page](https://pq-crystals.org/kyber/index.shtml).
diff --git a/vendor/blitter.com/go/kyber/cbd.go b/vendor/blitter.com/go/kyber/cbd.go
deleted file mode 100644
index 31ca4fd..0000000
--- a/vendor/blitter.com/go/kyber/cbd.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// cbd.go - Centered binomial distribution.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-// Load bytes into a 64-bit integer in little-endian order.
-func loadLittleEndian(x []byte, bytes int) uint64 {
- var r uint64
- for i, v := range x[:bytes] {
- r |= uint64(v) << (8 * uint(i))
- }
- return r
-}
-
-// Given an array of uniformly random bytes, compute polynomial with
-// coefficients distributed according to a centered binomial distribution
-// with parameter eta.
-func (p *poly) cbd(buf []byte, eta int) {
- hardwareAccelImpl.cbdFn(p, buf, eta)
-}
-
-func cbdRef(p *poly, buf []byte, eta int) {
- switch eta {
- case 3:
- var a, b [4]uint32
- for i := 0; i < kyberN/4; i++ {
- t := loadLittleEndian(buf[3*i:], 3)
- var d uint32
- for j := 0; j < 3; j++ {
- d += uint32((t >> uint(j)) & 0x249249)
- }
-
- a[0] = d & 0x7
- b[0] = (d >> 3) & 0x7
- a[1] = (d >> 6) & 0x7
- b[1] = (d >> 9) & 0x7
- a[2] = (d >> 12) & 0x7
- b[2] = (d >> 15) & 0x7
- a[3] = (d >> 18) & 0x7
- b[3] = (d >> 21)
-
- p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
- p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
- p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
- p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
- }
- case 4:
- var a, b [4]uint32
- for i := 0; i < kyberN/4; i++ {
- t := loadLittleEndian(buf[4*i:], 4)
- var d uint32
- for j := 0; j < 4; j++ {
- d += uint32((t >> uint(j)) & 0x11111111)
- }
-
- a[0] = d & 0xf
- b[0] = (d >> 4) & 0xf
- a[1] = (d >> 8) & 0xf
- b[1] = (d >> 12) & 0xf
- a[2] = (d >> 16) & 0xf
- b[2] = (d >> 20) & 0xf
- a[3] = (d >> 24) & 0xf
- b[3] = (d >> 28)
-
- p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
- p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
- p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
- p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
- }
- case 5:
- var a, b [4]uint64
- for i := 0; i < kyberN/4; i++ {
- t := loadLittleEndian(buf[5*i:], 5)
- var d uint64
- for j := 0; j < 5; j++ {
- d += (t >> uint(j)) & 0x0842108421
- }
-
- a[0] = d & 0x1f
- b[0] = (d >> 5) & 0x1f
- a[1] = (d >> 10) & 0x1f
- b[1] = (d >> 15) & 0x1f
- a[2] = (d >> 20) & 0x1f
- b[2] = (d >> 25) & 0x1f
- a[3] = (d >> 30) & 0x1f
- b[3] = (d >> 35)
-
- p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
- p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
- p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
- p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
- }
- default:
- panic("kyber: eta must be in {3,4,5}")
- }
-}
diff --git a/vendor/blitter.com/go/kyber/doc.go b/vendor/blitter.com/go/kyber/doc.go
deleted file mode 100644
index 8789ad4..0000000
--- a/vendor/blitter.com/go/kyber/doc.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// doc.go - Kyber godoc extras.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-// Package kyber implements the Kyber IND-CCA2-secure key encapsulation
-// mechanism (KEM), based on the hardness of solving the learning-with-errors
-// (LWE) problem over module lattices as submitted to the NIST Post-Quantum
-// Cryptography project.
-//
-// This implementation is a port of the Public Domain reference implementation
-// by Joppe Bos, Léo Ducas, Eike Kiltz , Tancrède Lepoint, Vadim Lyubashevsky,
-// John Schanck, Peter Schwabe, Gregor Seiler, and Damien Stehlé.
-//
-// Additionally implementations of Kyber.AKE and Kyber.UAKE as presented in
-// the Kyber paper are included for users that seek an authenticated key
-// exchange.
-//
-// Note that the algorithm is not finalized yet, and may change in a backward
-// incompatible manner in the future. The designers currently recommend
-// combining Kyber with an established pre-quantum algorithm like ECDH, and
-// using the Kyber-768 parameter set.
-//
-// For more information, see https://pq-crystals.org/kyber/index.shtml.
-package kyber
diff --git a/vendor/blitter.com/go/kyber/hwaccel.go b/vendor/blitter.com/go/kyber/hwaccel.go
deleted file mode 100644
index 80ec831..0000000
--- a/vendor/blitter.com/go/kyber/hwaccel.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// hwaccel.go - Hardware acceleration hooks.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-var (
- isHardwareAccelerated = false
- hardwareAccelImpl = implReference
-
- implReference = &hwaccelImpl{
- name: "Reference",
- nttFn: nttRef,
- invnttFn: invnttRef,
- pointwiseAccFn: pointwiseAccRef,
- cbdFn: cbdRef,
- }
-)
-
-type hwaccelImpl struct {
- name string
- nttFn func(*[kyberN]uint16)
- invnttFn func(*[kyberN]uint16)
- pointwiseAccFn func(*poly, *polyVec, *polyVec)
- cbdFn func(*poly, []byte, int)
-}
-
-func forceDisableHardwareAcceleration() {
- // This is for the benefit of testing, so that it's possible to test
- // all versions that are supported by the host.
- isHardwareAccelerated = false
- hardwareAccelImpl = implReference
-}
-
-// IsHardwareAccelerated returns true iff the Kyber implementation will use
-// hardware acceleration (eg: AVX2).
-func IsHardwareAccelerated() bool {
- return isHardwareAccelerated
-}
-
-func init() {
- initHardwareAcceleration()
-}
diff --git a/vendor/blitter.com/go/kyber/hwaccel_amd64.go b/vendor/blitter.com/go/kyber/hwaccel_amd64.go
deleted file mode 100644
index 6413187..0000000
--- a/vendor/blitter.com/go/kyber/hwaccel_amd64.go
+++ /dev/null
@@ -1,256 +0,0 @@
-// hwaccel_amd64.go - AMD64 optimized routines.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-// +build amd64,!gccgo,!noasm,go1.10
-
-package kyber
-
-var zetasExp = [752]uint16{
- 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777,
- 3777, 3777, 3777, 3777, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499,
- 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 3625, 3625, 3625, 3625,
- 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625,
- 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985,
- 3985, 3985, 3985, 3985, 6581, 6581, 6581, 6581, 6581, 6581, 6581, 6581,
- 6581, 6581, 6581, 6581, 6581, 6581, 6581, 6581, 2456, 2456, 2456, 2456,
- 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456,
- 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194,
- 2194, 2194, 2194, 2194, 121, 121, 121, 121, 121, 121, 121, 121, 121,
- 121, 121, 121, 121, 121, 121, 121, 5431, 5431, 5431, 5431, 5431, 5431,
- 5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 834, 834,
- 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834,
- 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186,
- 5186, 5186, 5186, 5186, 5362, 5362, 5362, 5362, 5362, 5362, 5362, 5362,
- 5362, 5362, 5362, 5362, 5362, 5362, 5362, 5362, 2876, 2876, 2876, 2876,
- 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876,
- 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980,
- 5980, 5980, 5980, 5980, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414,
- 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 2816, 2816, 2816, 2816,
- 2816, 2816, 2816, 2816, 5593, 5593, 5593, 5593, 5593, 5593, 5593, 5593,
- 5444, 5444, 5444, 5444, 5444, 5444, 5444, 5444, 1986, 1986, 1986, 1986,
- 1986, 1986, 1986, 1986, 6082, 6082, 6082, 6082, 6082, 6082, 6082, 6082,
- 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 3706, 3706, 3706, 3706,
- 3706, 3706, 3706, 3706, 5675, 5675, 5675, 5675, 5675, 5675, 5675, 5675,
- 6156, 6156, 6156, 6156, 6156, 6156, 6156, 6156, 5124, 5124, 5124, 5124,
- 5124, 5124, 5124, 5124, 1296, 1296, 1296, 1296, 1296, 1296, 1296, 1296,
- 1483, 1483, 1483, 1483, 1483, 1483, 1483, 1483, 4851, 4851, 4851, 4851,
- 4851, 4851, 4851, 4851, 3364, 3364, 3364, 3364, 3364, 3364, 3364, 3364,
- 617, 617, 617, 617, 617, 617, 617, 617, 1921, 1921, 1921, 1921, 1921,
- 1921, 1921, 1921, 3992, 3992, 3992, 3992, 5943, 5943, 5943, 5943, 3266,
- 3266, 3266, 3266, 4081, 4081, 4081, 4081, 810, 810, 810, 810, 1887,
- 1887, 1887, 1887, 7043, 7043, 7043, 7043, 7674, 7674, 7674, 7674, 7243,
- 7243, 7243, 7243, 7002, 7002, 7002, 7002, 6376, 6376, 6376, 6376, 5921,
- 5921, 5921, 5921, 396, 396, 396, 396, 4507, 4507, 4507, 4507, 4126,
- 4126, 4126, 4126, 5800, 5800, 5800, 5800, 3772, 3772, 3772, 3772, 5146,
- 5146, 5146, 5146, 5241, 5241, 5241, 5241, 5126, 5126, 5126, 5126, 1535,
- 1535, 1535, 1535, 7132, 7132, 7132, 7132, 3153, 3153, 3153, 3153, 2310,
- 2310, 2310, 2310, 6282, 6282, 6282, 6282, 1321, 1321, 1321, 1321, 514,
- 514, 514, 514, 4725, 4725, 4725, 4725, 7578, 7578, 7578, 7578, 2804,
- 2804, 2804, 2804, 5638, 5638, 5638, 5638, 6250, 6250, 6250, 6250, 6627,
- 6627, 1698, 1698, 4225, 4225, 1166, 1166, 2426, 2426, 3831, 3831, 915,
- 915, 7679, 7679, 4264, 4264, 7487, 7487, 2919, 2919, 2789, 2789, 3405,
- 3405, 2385, 2385, 5568, 5568, 4949, 4949, 2175, 2175, 373, 373, 3692,
- 3692, 6951, 6951, 5925, 5925, 3135, 3135, 5290, 5290, 660, 660, 6184,
- 6184, 2572, 2572, 4536, 4536, 1350, 1350, 5457, 5457, 4093, 4093, 6000,
- 6000, 2883, 2883, 6291, 6291, 1598, 1598, 3750, 3750, 2762, 2762, 2835,
- 2835, 2764, 2764, 5448, 5448, 3816, 3816, 6148, 6148, 1464, 1464, 6954,
- 6954, 1521, 1521, 1386, 1386, 4253, 4253, 6760, 6760, 4938, 4938, 5521,
- 5521, 2649, 2649, 6822, 6822, 2579, 2579, 1532, 1532, 1919, 1919, 7195,
- 7195, 404, 404, 6625, 6625, 783, 783, 1799, 1799, 5016, 5016, 3480,
- 3480, 2133, 2133, 4371, 4371, 6513, 6513, 7664, 3744, 2422, 2001, 1278,
- 929, 6333, 5451, 7502, 6439, 5622, 6611, 2161, 1649, 2072, 3177, 5610,
- 1121, 7245, 236, 715, 670, 7023, 6205, 5303, 2767, 3542, 7455, 1203,
- 1181, 7530, 3887, 1712, 7459, 2786, 7230, 4134, 1779, 6530, 7247, 3568,
- 3988, 3581, 6095, 1509, 2918, 2339, 6274, 3434, 4131, 2340, 2891, 2998,
- 4367, 3461, 4962, 5434, 5092, 1144, 1072, 1295, 4866, 3911, 3450, 3781,
- 5423, 796, 3163, 4473, 7092, 2963, 7557, 3214, 3334, 4315, 3936, 3723,
- 1931, 7252, 7279, 4273, 83, 6155, 826, 6343, 2345, 5378, 2515, 7039,
- 5844, 4716, 6890, 370, 293, 3312, 2083, 5992, 6904, 2070, 2262, 6788,
- 2386, 7493, 6162, 4807, 6277, 1012, 2130, 1441, 2532, 4346, 6597, 4338,
- 2937, 509, 6278, 2812, 3763, 592, 2005, 3657, 2460, 4004, 3752, 692,
- 1669, 2167, 4394,
-}
-
-var zetasInvExp = [752]uint16{
- 3287, 5514, 6012, 6989, 3929, 3677, 5221, 4024, 5676, 7089, 3918, 4869,
- 1403, 7172, 4744, 3343, 1084, 3335, 5149, 6240, 5551, 6669, 1404, 2874,
- 1519, 188, 5295, 893, 5419, 5611, 777, 1689, 5598, 4369, 7388, 7311,
- 791, 2965, 1837, 642, 5166, 2303, 5336, 1338, 6855, 1526, 7598, 3408,
- 402, 429, 5750, 3958, 3745, 3366, 4347, 4467, 124, 4718, 589, 3208,
- 4518, 6885, 2258, 3900, 4231, 3770, 2815, 6386, 6609, 6537, 2589, 2247,
- 2719, 4220, 3314, 4683, 4790, 5341, 3550, 4247, 1407, 5342, 4763, 6172,
- 1586, 4100, 3693, 4113, 434, 1151, 5902, 3547, 451, 4895, 222, 5969,
- 3794, 151, 6500, 6478, 226, 4139, 4914, 2378, 1476, 658, 7011, 6966,
- 7445, 436, 6560, 2071, 4504, 5609, 6032, 5520, 1070, 2059, 1242, 179,
- 2230, 1348, 6752, 6403, 5680, 5259, 3937, 17, 1168, 1168, 3310, 3310,
- 5548, 5548, 4201, 4201, 2665, 2665, 5882, 5882, 6898, 6898, 1056, 1056,
- 7277, 7277, 486, 486, 5762, 5762, 6149, 6149, 5102, 5102, 859, 859,
- 5032, 5032, 2160, 2160, 2743, 2743, 921, 921, 3428, 3428, 6295, 6295,
- 6160, 6160, 727, 727, 6217, 6217, 1533, 1533, 3865, 3865, 2233, 2233,
- 4917, 4917, 4846, 4846, 4919, 4919, 3931, 3931, 6083, 6083, 1390, 1390,
- 4798, 4798, 1681, 1681, 3588, 3588, 2224, 2224, 6331, 6331, 3145, 3145,
- 5109, 5109, 1497, 1497, 7021, 7021, 2391, 2391, 4546, 4546, 1756, 1756,
- 730, 730, 3989, 3989, 7308, 7308, 5506, 5506, 2732, 2732, 2113, 2113,
- 5296, 5296, 4276, 4276, 4892, 4892, 4762, 4762, 194, 194, 3417, 3417, 2,
- 2, 6766, 6766, 3850, 3850, 5255, 5255, 6515, 6515, 3456, 3456, 5983,
- 5983, 1054, 1054, 1431, 1431, 1431, 1431, 2043, 2043, 2043, 2043, 4877,
- 4877, 4877, 4877, 103, 103, 103, 103, 2956, 2956, 2956, 2956, 7167,
- 7167, 7167, 7167, 6360, 6360, 6360, 6360, 1399, 1399, 1399, 1399, 5371,
- 5371, 5371, 5371, 4528, 4528, 4528, 4528, 549, 549, 549, 549, 6146,
- 6146, 6146, 6146, 2555, 2555, 2555, 2555, 2440, 2440, 2440, 2440, 2535,
- 2535, 2535, 2535, 3909, 3909, 3909, 3909, 1881, 1881, 1881, 1881, 3555,
- 3555, 3555, 3555, 3174, 3174, 3174, 3174, 7285, 7285, 7285, 7285, 1760,
- 1760, 1760, 1760, 1305, 1305, 1305, 1305, 679, 679, 679, 679, 438, 438,
- 438, 438, 7, 7, 7, 7, 638, 638, 638, 638, 5794, 5794, 5794, 5794, 6871,
- 6871, 6871, 6871, 3600, 3600, 3600, 3600, 4415, 4415, 4415, 4415, 1738,
- 1738, 1738, 1738, 3689, 3689, 3689, 3689, 5760, 5760, 5760, 5760, 5760,
- 5760, 5760, 5760, 7064, 7064, 7064, 7064, 7064, 7064, 7064, 7064, 4317,
- 4317, 4317, 4317, 4317, 4317, 4317, 4317, 2830, 2830, 2830, 2830, 2830,
- 2830, 2830, 2830, 6198, 6198, 6198, 6198, 6198, 6198, 6198, 6198, 6385,
- 6385, 6385, 6385, 6385, 6385, 6385, 6385, 2557, 2557, 2557, 2557, 2557,
- 2557, 2557, 2557, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 2006,
- 2006, 2006, 2006, 2006, 2006, 2006, 2006, 3975, 3975, 3975, 3975, 3975,
- 3975, 3975, 3975, 5688, 5688, 5688, 5688, 5688, 5688, 5688, 5688, 1599,
- 1599, 1599, 1599, 1599, 1599, 1599, 1599, 5695, 5695, 5695, 5695, 5695,
- 5695, 5695, 5695, 2237, 2237, 2237, 2237, 2237, 2237, 2237, 2237, 2088,
- 2088, 2088, 2088, 2088, 2088, 2088, 2088, 4865, 4865, 4865, 4865, 4865,
- 4865, 4865, 4865, 6267, 6267, 6267, 6267, 6267, 6267, 6267, 6267, 6267,
- 6267, 6267, 6267, 6267, 6267, 6267, 6267, 1701, 1701, 1701, 1701, 1701,
- 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 4805,
- 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805,
- 4805, 4805, 4805, 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319,
- 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2495, 2495, 2495, 2495, 2495,
- 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 6847,
- 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847,
- 6847, 6847, 6847, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250,
- 2250, 2250, 2250, 2250, 2250, 2250, 2250, 7560, 7560, 7560, 7560, 7560,
- 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 5487,
- 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487,
- 5487, 5487, 5487, 5225, 5225, 5225, 5225, 5225, 5225, 5225, 5225, 5225,
- 5225, 5225, 5225, 5225, 5225, 5225, 5225, 1100, 1100, 1100, 1100, 1100,
- 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 3696,
- 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696,
- 3696, 3696, 3696, 4056, 4056, 4056, 4056, 4056, 4056, 4056, 4056, 4056,
- 4056, 4056, 4056, 4056, 4056, 4056, 4056, 3182, 3182, 3182, 3182, 3182,
- 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 5776,
- 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776,
- 5776, 5776, 5776,
-}
-
-//go:noescape
-func cpuidAmd64(cpuidParams *uint32)
-
-//go:noescape
-func xgetbv0Amd64(xcrVec *uint32)
-
-//go:noescape
-func nttAVX2(inout, zetas *uint16)
-
-//go:noescape
-func invnttAVX2(inout, omegas *uint16)
-
-//go:noescape
-func pointwiseAccK2AVX2(dst *uint16, a, b **uint16)
-
-//go:noescape
-func pointwiseAccK3AVX2(dst *uint16, a, b **uint16)
-
-//go:noescape
-func pointwiseAccK4AVX2(dst *uint16, a, b **uint16)
-
-//go:noescape
-func cbdEta4AVX2(dst *uint16, buf *byte)
-
-func supportsAVX2() bool {
- // https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
- const (
- osXsaveBit = 1 << 27
- avx2Bit = 1 << 5
- )
-
- // Check to see if CPUID actually supports the leaf that indicates AVX2.
- // CPUID.(EAX=0H, ECX=0H) >= 7
- regs := [4]uint32{0x00}
- cpuidAmd64(®s[0])
- if regs[0] < 7 {
- return false
- }
-
- // Check to see if the OS knows how to save/restore XMM/YMM state.
- // CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
- regs = [4]uint32{0x01}
- cpuidAmd64(®s[0])
- if regs[2]&osXsaveBit == 0 {
- return false
- }
- xcrRegs := [2]uint32{}
- xgetbv0Amd64(&xcrRegs[0])
- if xcrRegs[0]&6 != 6 {
- return false
- }
-
- // Check for AVX2 support.
- // CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
- regs = [4]uint32{0x07}
- cpuidAmd64(®s[0])
- return regs[1]&avx2Bit != 0
-}
-
-var implAVX2 = &hwaccelImpl{
- name: "AVX2",
- nttFn: nttYMM,
- invnttFn: invnttYMM,
- pointwiseAccFn: pointwiseAccYMM,
- cbdFn: cbdYMM,
-}
-
-func nttYMM(p *[kyberN]uint16) {
- nttAVX2(&p[0], &zetasExp[0])
-}
-
-func invnttYMM(a *[kyberN]uint16) {
- invnttAVX2(&a[0], &zetasInvExp[0])
-}
-
-func pointwiseAccYMM(p *poly, a, b *polyVec) {
- // Unlike the C code, a polyVec won't have the polys in contigious
- // memory. So each assembly function takes vectors of pointers to
- // each polyvec's polys.
- //
- // Kind of ugly, but it's the price to pay for flexibility...
-
- var aVec, bVec [4]*uint16 // k is in {2,3,4}.
- for i := range a.vec {
- aVec[i] = &a.vec[i].coeffs[0]
- bVec[i] = &b.vec[i].coeffs[0]
- }
-
- switch len(a.vec) {
- case 2:
- pointwiseAccK2AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
- case 3:
- pointwiseAccK3AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
- case 4:
- pointwiseAccK4AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
- }
-}
-
-func cbdYMM(p *poly, buf []byte, eta int) {
- switch eta {
- case 4:
- cbdEta4AVX2(&p.coeffs[0], &buf[0])
- default:
- cbdRef(p, buf, eta)
- }
-}
-
-func initHardwareAcceleration() {
- if supportsAVX2() {
- isHardwareAccelerated = true
- hardwareAccelImpl = implAVX2
- }
-}
diff --git a/vendor/blitter.com/go/kyber/hwaccel_amd64.s b/vendor/blitter.com/go/kyber/hwaccel_amd64.s
deleted file mode 100644
index a98dc72..0000000
--- a/vendor/blitter.com/go/kyber/hwaccel_amd64.s
+++ /dev/null
@@ -1,2749 +0,0 @@
-// +build !noasm,go1.10
-// hwaccel_amd64.s - AMD64 optimized routines.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-#include "textflag.h"
-
-// func cpuidAmd64(cpuidParams *uint32)
-TEXT ·cpuidAmd64(SB), NOSPLIT, $0-8
- MOVQ cpuidParams+0(FP), R15
- MOVL 0(R15), AX
- MOVL 8(R15), CX
- CPUID
- MOVL AX, 0(R15)
- MOVL BX, 4(R15)
- MOVL CX, 8(R15)
- MOVL DX, 12(R15)
- RET
-
-// func xgetbv0Amd64(xcrVec *uint32)
-TEXT ·xgetbv0Amd64(SB), NOSPLIT, $0-8
- MOVQ xcrVec+0(FP), BX
- XORL CX, CX
- XGETBV
- MOVL AX, 0(BX)
- MOVL DX, 4(BX)
- RET
-
-// Routines taken from the `avx2` implementation, converted to Go's assembly
-// dialect. I do this in lieu of cutting myself to see if I still can feel
-// pain.
-//
-// The conversion is mostly direct except:
-// * Instead of aligned loads, unaligned loads are used, as there is no
-// meaningful difference on modern Intel systems, and it's not immediately
-// obvious to me how Go will align global data.
-// * The polyvec_pointwise_acc family of routines take vectors of pointers
-// due to the different internal memory layout of a polyvec.
-// * The constants are renamed slightly.
-
-// Note:
-// * These must be kept in sync with the values in params.go.
-// Currently assumes Q = 7681, Q_INV = 57857.
-// * Caution, Little endian so things will look different from avx2/consts.c.
-DATA ·vpshufb_idx<>+0x00(SB)/8, $0x0504070601000302
-DATA ·vpshufb_idx<>+0x08(SB)/8, $0x0d0c0f0e09080b0a
-DATA ·vpshufb_idx<>+0x10(SB)/8, $0x0504070601000302
-DATA ·vpshufb_idx<>+0x18(SB)/8, $0x0d0c0f0e09080b0a
-GLOBL ·vpshufb_idx<>(SB), (NOPTR+RODATA), $32
-
-DATA ·low_mask<>+0x00(SB)/8, $0x1fff1fff1fff1fff
-DATA ·low_mask<>+0x08(SB)/8, $0x1fff1fff1fff1fff
-DATA ·low_mask<>+0x10(SB)/8, $0x1fff1fff1fff1fff
-DATA ·low_mask<>+0x18(SB)/8, $0x1fff1fff1fff1fff
-GLOBL ·low_mask<>(SB), (NOPTR+RODATA), $32
-
-DATA ·lowdword<>+0x00(SB)/8, $0x0000ffff0000ffff
-DATA ·lowdword<>+0x08(SB)/8, $0x0000ffff0000ffff
-DATA ·lowdword<>+0x10(SB)/8, $0x0000ffff0000ffff
-DATA ·lowdword<>+0x18(SB)/8, $0x0000ffff0000ffff
-GLOBL ·lowdword<>(SB), (NOPTR+RODATA), $32
-
-DATA ·q_x16<>+0x00(SB)/8, $0x1e011e011e011e01
-DATA ·q_x16<>+0x08(SB)/8, $0x1e011e011e011e01
-DATA ·q_x16<>+0x10(SB)/8, $0x1e011e011e011e01
-DATA ·q_x16<>+0x18(SB)/8, $0x1e011e011e011e01
-GLOBL ·q_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·q2_x16<>+0x00(SB)/8, $0x3c023c023c023c02
-DATA ·q2_x16<>+0x08(SB)/8, $0x3c023c023c023c02
-DATA ·q2_x16<>+0x10(SB)/8, $0x3c023c023c023c02
-DATA ·q2_x16<>+0x18(SB)/8, $0x3c023c023c023c02
-GLOBL ·q2_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·qinv_x16<>+0x00(SB)/8, $0xe201e201e201e201
-DATA ·qinv_x16<>+0x08(SB)/8, $0xe201e201e201e201
-DATA ·qinv_x16<>+0x10(SB)/8, $0xe201e201e201e201
-DATA ·qinv_x16<>+0x18(SB)/8, $0xe201e201e201e201
-GLOBL ·qinv_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·f_x16<>+0x00(SB)/8, $0x0100010001000100
-DATA ·f_x16<>+0x08(SB)/8, $0x0100010001000100
-DATA ·f_x16<>+0x10(SB)/8, $0x0100010001000100
-DATA ·f_x16<>+0x18(SB)/8, $0x0100010001000100
-GLOBL ·f_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·v_x16<>+0x00(SB)/8, $0x4442444244424442
-DATA ·v_x16<>+0x08(SB)/8, $0x4442444244424442
-DATA ·v_x16<>+0x10(SB)/8, $0x4442444244424442
-DATA ·v_x16<>+0x18(SB)/8, $0x4442444244424442
-GLOBL ·v_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·montsq_x16<>+0x00(SB)/8, $0x15c115c115c115c1
-DATA ·montsq_x16<>+0x08(SB)/8, $0x15c115c115c115c1
-DATA ·montsq_x16<>+0x10(SB)/8, $0x15c115c115c115c1
-DATA ·montsq_x16<>+0x18(SB)/8, $0x15c115c115c115c1
-GLOBL ·montsq_x16<>(SB), (NOPTR+RODATA), $32
-
-DATA ·mask11<>+0x00(SB)/8, $0x1111111111111111
-DATA ·mask11<>+0x08(SB)/8, $0x1111111111111111
-DATA ·mask11<>+0x10(SB)/8, $0x1111111111111111
-DATA ·mask11<>+0x18(SB)/8, $0x1111111111111111
-GLOBL ·mask11<>(SB), (NOPTR+RODATA), $32
-
-DATA ·mask0f<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f
-DATA ·mask0f<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f
-DATA ·mask0f<>+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
-DATA ·mask0f<>+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
-GLOBL ·mask0f<>(SB), (NOPTR+RODATA), $32
-
-// func nttAVX2(inout, zetas *uint16)
-TEXT ·nttAVX2(SB), NOSPLIT, $0-16
- MOVQ inout+0(FP), DI
- MOVQ zetas+8(FP), SI
-
- VMOVDQU ·qinv_x16<>(SB), Y0
- VMOVDQU ·q_x16<>(SB), Y1
- VMOVDQU ·low_mask<>(SB), Y2
-
- // zetas
- VMOVDQU (SI), Y3
-
- // first round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 256(DI), Y8
- VMOVDQU 288(DI), Y9
- VMOVDQU 320(DI), Y10
- VMOVDQU 352(DI), Y11
-
- // level 0
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULHW Y3, Y8, Y8
- VPMULLW Y3, Y9, Y13
- VPMULHW Y3, Y9, Y9
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y12
- VPSUBW Y13, Y9, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y8
- VPSUBW Y13, Y5, Y9
- VPSUBW Y14, Y6, Y10
- VPSUBW Y15, Y7, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y6, Y6
- VPADDW Y15, Y7, Y7
-
- // store
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- VMOVDQU Y6, 64(DI)
- VMOVDQU Y7, 96(DI)
- VMOVDQU Y8, 256(DI)
- VMOVDQU Y9, 288(DI)
- VMOVDQU Y10, 320(DI)
- VMOVDQU Y11, 352(DI)
-
- ADDQ $128, DI
-
- // second round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 256(DI), Y8
- VMOVDQU 288(DI), Y9
- VMOVDQU 320(DI), Y10
- VMOVDQU 352(DI), Y11
-
- // level 0
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULHW Y3, Y8, Y8
- VPMULLW Y3, Y9, Y13
- VPMULHW Y3, Y9, Y9
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y12
- VPSUBW Y13, Y9, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y8
- VPSUBW Y13, Y5, Y9
- VPSUBW Y14, Y6, Y10
- VPSUBW Y15, Y7, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y6, Y6
- VPADDW Y15, Y7, Y7
-
- // store
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- VMOVDQU Y6, 64(DI)
- VMOVDQU Y7, 96(DI)
- VMOVDQU Y8, 256(DI)
- VMOVDQU Y9, 288(DI)
- VMOVDQU Y10, 320(DI)
- VMOVDQU Y11, 352(DI)
-
- SUBQ $128, DI
-
- // first round
- // zetas
- VMOVDQU 32(SI), Y3
-
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 128(DI), Y8
- VMOVDQU 160(DI), Y9
- VMOVDQU 192(DI), Y10
- VMOVDQU 224(DI), Y11
-
- // level 1
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULHW Y3, Y8, Y8
- VPMULLW Y3, Y9, Y13
- VPMULHW Y3, Y9, Y9
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y12
- VPSUBW Y13, Y9, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y8
- VPSUBW Y13, Y5, Y9
- VPSUBW Y14, Y6, Y10
- VPSUBW Y15, Y7, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y6, Y6
- VPADDW Y15, Y7, Y7
-
- // level 2
- // zetas
- VMOVDQU 96(SI), Y15
- VMOVDQU 128(SI), Y3
-
- // mul
- VPMULLW Y15, Y6, Y12
- VPMULHW Y15, Y6, Y6
- VPMULLW Y15, Y7, Y13
- VPMULHW Y15, Y7, Y7
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y6, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y6
- VPSUBW Y13, Y5, Y7
- VPSUBW Y14, Y8, Y10
- VPSUBW Y15, Y9, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y8, Y8
- VPADDW Y15, Y9, Y9
-
- // level 3
- // zetas
- VMOVDQU 224(SI), Y13
- VMOVDQU 256(SI), Y14
- VMOVDQU 288(SI), Y15
- VMOVDQU 320(SI), Y3
-
- // mul
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
- VPSUBW Y15, Y11, Y15
-
- // reduce 2
- VPSRAW $13, Y4, Y5
- VPSRAW $13, Y6, Y7
- VPSRAW $13, Y8, Y9
- VPSRAW $13, Y10, Y11
- VPAND Y2, Y4, Y4
- VPAND Y2, Y6, Y6
- VPAND Y2, Y8, Y8
- VPAND Y2, Y10, Y10
- VPSUBW Y5, Y4, Y4
- VPSUBW Y7, Y6, Y6
- VPSUBW Y9, Y8, Y8
- VPSUBW Y11, Y10, Y10
- VPSLLW $9, Y5, Y5
- VPSLLW $9, Y7, Y7
- VPSLLW $9, Y9, Y9
- VPSLLW $9, Y11, Y11
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
- VPADDW Y11, Y10, Y10
-
- // update
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPSUBW Y15, Y10, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
- VPADDW Y15, Y10, Y10
-
- // level 4
- // zetas
- VMOVDQU 480(SI), Y12
- VMOVDQU 512(SI), Y13
- VMOVDQU 544(SI), Y14
- VMOVDQU 576(SI), Y15
-
- // shuffle
- VPERM2I128 $0x02, Y4, Y5, Y3
- VPERM2I128 $0x13, Y4, Y5, Y4
- VPERM2I128 $0x02, Y6, Y7, Y5
- VPERM2I128 $0x13, Y6, Y7, Y6
- VPERM2I128 $0x02, Y8, Y9, Y7
- VPERM2I128 $0x13, Y8, Y9, Y8
- VPERM2I128 $0x02, Y10, Y11, Y9
- VPERM2I128 $0x13, Y10, Y11, Y10
-
- // mul
- VPMULLW Y12, Y4, Y11
- VPMULHW Y12, Y4, Y4
- VPMULLW Y13, Y6, Y12
- VPMULHW Y13, Y6, Y6
- VPMULLW Y14, Y8, Y13
- VPMULHW Y14, Y8, Y8
- VPMULLW Y15, Y10, Y14
- VPMULHW Y15, Y10, Y10
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y4, Y11
- VPSUBW Y12, Y6, Y12
- VPSUBW Y13, Y8, Y13
- VPSUBW Y14, Y10, Y14
-
- // update
- VPSUBW Y11, Y3, Y4
- VPSUBW Y12, Y5, Y6
- VPSUBW Y13, Y7, Y8
- VPSUBW Y14, Y9, Y10
- VPADDW Y11, Y3, Y3
- VPADDW Y12, Y5, Y5
- VPADDW Y13, Y7, Y7
- VPADDW Y14, Y9, Y9
-
- // level 5
- // zetas
- VMOVDQU 736(SI), Y12
- VMOVDQU 768(SI), Y13
- VMOVDQU 800(SI), Y14
- VMOVDQU 832(SI), Y15
-
- // shuffle
- VSHUFPD $0x00, Y4, Y3, Y11
- VSHUFPD $0x0F, Y4, Y3, Y3
- VSHUFPD $0x00, Y6, Y5, Y4
- VSHUFPD $0x0F, Y6, Y5, Y5
- VSHUFPD $0x00, Y8, Y7, Y6
- VSHUFPD $0x0F, Y8, Y7, Y7
- VSHUFPD $0x00, Y10, Y9, Y8
- VSHUFPD $0x0F, Y10, Y9, Y9
-
- // mul
- VPMULLW Y12, Y3, Y10
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y10, Y10
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y10, Y10
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y10, Y3, Y10
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // update
- VPSUBW Y10, Y11, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y10, Y11, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // level 6
- // shuffle
- VPSHUFD $0xB1, Y10, Y12
- VPSHUFD $0xB1, Y3, Y13
- VPSHUFD $0xB1, Y4, Y14
- VPSHUFD $0xB1, Y5, Y15
- VPBLENDD $0x55, Y10, Y13, Y10
- VPBLENDD $0xAA, Y3, Y12, Y3
- VPBLENDD $0x55, Y4, Y15, Y4
- VPBLENDD $0xAA, Y5, Y14, Y5
- VPSHUFD $0xB1, Y6, Y12
- VPSHUFD $0xB1, Y7, Y13
- VPSHUFD $0xB1, Y8, Y14
- VPSHUFD $0xB1, Y9, Y15
- VPBLENDD $0x55, Y6, Y13, Y6
- VPBLENDD $0xAA, Y7, Y12, Y7
- VPBLENDD $0x55, Y8, Y15, Y8
- VPBLENDD $0xAA, Y9, Y14, Y9
-
- // zetas
- VMOVDQU 992(SI), Y12
- VMOVDQU 1024(SI), Y13
- VMOVDQU 1056(SI), Y14
- VMOVDQU 1088(SI), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y11
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // reduce 2
- VPSRAW $13, Y10, Y3
- VPSRAW $13, Y4, Y5
- VPSRAW $13, Y6, Y7
- VPSRAW $13, Y8, Y9
- VPAND Y2, Y10, Y10
- VPAND Y2, Y4, Y4
- VPAND Y2, Y6, Y6
- VPAND Y2, Y8, Y8
- VPSUBW Y3, Y10, Y10
- VPSUBW Y5, Y4, Y4
- VPSUBW Y7, Y6, Y6
- VPSUBW Y9, Y8, Y8
- VPSLLW $9, Y3, Y3
- VPSLLW $9, Y5, Y5
- VPSLLW $9, Y7, Y7
- VPSLLW $9, Y9, Y9
- VPADDW Y3, Y10, Y10
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
-
- // update
- VPSUBW Y11, Y10, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y11, Y10, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // level 7
- // shuffle
- VMOVDQU ·vpshufb_idx<>(SB), Y15
- VPSHUFB Y15, Y10, Y11
- VPSHUFB Y15, Y3, Y12
- VPSHUFB Y15, Y4, Y13
- VPSHUFB Y15, Y5, Y14
- VPBLENDW $0x55, Y10, Y12, Y10
- VPBLENDW $0xAA, Y3, Y11, Y3
- VPBLENDW $0x55, Y4, Y14, Y4
- VPBLENDW $0xAA, Y5, Y13, Y5
- VPSHUFB Y15, Y6, Y11
- VPSHUFB Y15, Y7, Y12
- VPSHUFB Y15, Y8, Y13
- VPSHUFB Y15, Y9, Y14
- VPBLENDW $0x55, Y6, Y12, Y6
- VPBLENDW $0xAA, Y7, Y11, Y7
- VPBLENDW $0x55, Y8, Y14, Y8
- VPBLENDW $0xAA, Y9, Y13, Y9
-
- // zetas
- VMOVDQU 1248(SI), Y12
- VMOVDQU 1280(SI), Y13
- VMOVDQU 1312(SI), Y14
- VMOVDQU 1344(SI), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y11
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // reduce 3
- VMOVDQU ·q2_x16<>(SB), Y15
- VPSRAW $15, Y10, Y3
- VPSRAW $15, Y4, Y5
- VPSRAW $15, Y6, Y7
- VPSRAW $15, Y8, Y9
- VPAND Y15, Y3, Y3
- VPAND Y15, Y5, Y5
- VPAND Y15, Y7, Y7
- VPAND Y15, Y9, Y9
- VPADDW Y1, Y10, Y10
- VPADDW Y1, Y4, Y4
- VPADDW Y1, Y6, Y6
- VPADDW Y1, Y8, Y8
- VPADDW Y3, Y10, Y10
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
-
- // update
- VPSUBW Y11, Y10, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y11, Y10, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // reorder
- VPUNPCKLWD Y3, Y10, Y12
- VPUNPCKHWD Y3, Y10, Y13
- VPUNPCKLWD Y5, Y4, Y14
- VPUNPCKHWD Y5, Y4, Y15
- VPUNPCKLWD Y7, Y6, Y3
- VPUNPCKHWD Y7, Y6, Y4
- VPUNPCKLWD Y9, Y8, Y5
- VPUNPCKHWD Y9, Y8, Y6
- VPERM2I128 $0x20, Y13, Y12, Y11
- VPERM2I128 $0x31, Y13, Y12, Y12
- VPERM2I128 $0x20, Y15, Y14, Y13
- VPERM2I128 $0x31, Y15, Y14, Y14
- VPERM2I128 $0x20, Y4, Y3, Y15
- VPERM2I128 $0x31, Y4, Y3, Y3
- VPERM2I128 $0x20, Y6, Y5, Y4
- VPERM2I128 $0x31, Y6, Y5, Y5
-
- // store
- VMOVDQU Y11, (DI)
- VMOVDQU Y12, 32(DI)
- VMOVDQU Y13, 64(DI)
- VMOVDQU Y14, 96(DI)
- VMOVDQU Y15, 128(DI)
- VMOVDQU Y3, 160(DI)
- VMOVDQU Y4, 192(DI)
- VMOVDQU Y5, 224(DI)
-
- ADDQ $256, DI
-
- // second round
- // zetas
- VMOVDQU 64(SI), Y3
-
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 128(DI), Y8
- VMOVDQU 160(DI), Y9
- VMOVDQU 192(DI), Y10
- VMOVDQU 224(DI), Y11
-
- // level 1
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULHW Y3, Y8, Y8
- VPMULLW Y3, Y9, Y13
- VPMULHW Y3, Y9, Y9
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y12
- VPSUBW Y13, Y9, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y8
- VPSUBW Y13, Y5, Y9
- VPSUBW Y14, Y6, Y10
- VPSUBW Y15, Y7, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y6, Y6
- VPADDW Y15, Y7, Y7
-
- // level 2
- // zetas
- VMOVDQU 160(SI), Y15
- VMOVDQU 192(SI), Y3
-
- // mul
- VPMULLW Y15, Y6, Y12
- VPMULHW Y15, Y6, Y6
- VPMULLW Y15, Y7, Y13
- VPMULHW Y15, Y7, Y7
- VPMULLW Y3, Y10, Y14
- VPMULHW Y3, Y10, Y10
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y6, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y10, Y14
- VPSUBW Y15, Y11, Y15
-
- // update
- VPSUBW Y12, Y4, Y6
- VPSUBW Y13, Y5, Y7
- VPSUBW Y14, Y8, Y10
- VPSUBW Y15, Y9, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y5, Y5
- VPADDW Y14, Y8, Y8
- VPADDW Y15, Y9, Y9
-
- // level 3
- // zetas
- VMOVDQU 352(SI), Y13
- VMOVDQU 384(SI), Y14
- VMOVDQU 416(SI), Y15
- VMOVDQU 448(SI), Y3
-
- // mul
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
- VPSUBW Y15, Y11, Y15
-
- // reduce 2
- VPSRAW $13, Y4, Y5
- VPSRAW $13, Y6, Y7
- VPSRAW $13, Y8, Y9
- VPSRAW $13, Y10, Y11
- VPAND Y2, Y4, Y4
- VPAND Y2, Y6, Y6
- VPAND Y2, Y8, Y8
- VPAND Y2, Y10, Y10
- VPSUBW Y5, Y4, Y4
- VPSUBW Y7, Y6, Y6
- VPSUBW Y9, Y8, Y8
- VPSUBW Y11, Y10, Y10
- VPSLLW $9, Y5, Y5
- VPSLLW $9, Y7, Y7
- VPSLLW $9, Y9, Y9
- VPSLLW $9, Y11, Y11
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
- VPADDW Y11, Y10, Y10
-
- // update
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPSUBW Y15, Y10, Y11
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
- VPADDW Y15, Y10, Y10
-
- // level 4
- // zetas
- VMOVDQU 608(SI), Y12
- VMOVDQU 640(SI), Y13
- VMOVDQU 672(SI), Y14
- VMOVDQU 704(SI), Y15
-
- // shuffle
- VPERM2I128 $0x02, Y4, Y5, Y3
- VPERM2I128 $0x13, Y4, Y5, Y4
- VPERM2I128 $0x02, Y6, Y7, Y5
- VPERM2I128 $0x13, Y6, Y7, Y6
- VPERM2I128 $0x02, Y8, Y9, Y7
- VPERM2I128 $0x13, Y8, Y9, Y8
- VPERM2I128 $0x02, Y10, Y11, Y9
- VPERM2I128 $0x13, Y10, Y11, Y10
-
- // mul
- VPMULLW Y12, Y4, Y11
- VPMULHW Y12, Y4, Y4
- VPMULLW Y13, Y6, Y12
- VPMULHW Y13, Y6, Y6
- VPMULLW Y14, Y8, Y13
- VPMULHW Y14, Y8, Y8
- VPMULLW Y15, Y10, Y14
- VPMULHW Y15, Y10, Y10
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y4, Y11
- VPSUBW Y12, Y6, Y12
- VPSUBW Y13, Y8, Y13
- VPSUBW Y14, Y10, Y14
-
- // update
- VPSUBW Y11, Y3, Y4
- VPSUBW Y12, Y5, Y6
- VPSUBW Y13, Y7, Y8
- VPSUBW Y14, Y9, Y10
- VPADDW Y11, Y3, Y3
- VPADDW Y12, Y5, Y5
- VPADDW Y13, Y7, Y7
- VPADDW Y14, Y9, Y9
-
- // level 5
- // zetas
- VMOVDQU 864(SI), Y12
- VMOVDQU 896(SI), Y13
- VMOVDQU 928(SI), Y14
- VMOVDQU 960(SI), Y15
-
- // shuffle
- VSHUFPD $0x00, Y4, Y3, Y11
- VSHUFPD $0x0F, Y4, Y3, Y3
- VSHUFPD $0x00, Y6, Y5, Y4
- VSHUFPD $0x0F, Y6, Y5, Y5
- VSHUFPD $0x00, Y8, Y7, Y6
- VSHUFPD $0x0F, Y8, Y7, Y7
- VSHUFPD $0x00, Y10, Y9, Y8
- VSHUFPD $0x0F, Y10, Y9, Y9
-
- // mul
- VPMULLW Y12, Y3, Y10
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y10, Y10
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y10, Y10
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y10, Y3, Y10
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // update
- VPSUBW Y10, Y11, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y10, Y11, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // level 6
- // shuffle
- VPSHUFD $0xB1, Y10, Y12
- VPSHUFD $0xB1, Y3, Y13
- VPSHUFD $0xB1, Y4, Y14
- VPSHUFD $0xB1, Y5, Y15
- VPBLENDD $0x55, Y10, Y13, Y10
- VPBLENDD $0xAA, Y3, Y12, Y3
- VPBLENDD $0x55, Y4, Y15, Y4
- VPBLENDD $0xAA, Y5, Y14, Y5
- VPSHUFD $0xB1, Y6, Y12
- VPSHUFD $0xB1, Y7, Y13
- VPSHUFD $0xB1, Y8, Y14
- VPSHUFD $0xB1, Y9, Y15
- VPBLENDD $0x55, Y6, Y13, Y6
- VPBLENDD $0xAA, Y7, Y12, Y7
- VPBLENDD $0x55, Y8, Y15, Y8
- VPBLENDD $0xAA, Y9, Y14, Y9
-
- // zetas
- VMOVDQU 1120(SI), Y12
- VMOVDQU 1152(SI), Y13
- VMOVDQU 1184(SI), Y14
- VMOVDQU 1216(SI), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y11
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // reduce 2
- VPSRAW $13, Y10, Y3
- VPSRAW $13, Y4, Y5
- VPSRAW $13, Y6, Y7
- VPSRAW $13, Y8, Y9
- VPAND Y2, Y10, Y10
- VPAND Y2, Y4, Y4
- VPAND Y2, Y6, Y6
- VPAND Y2, Y8, Y8
- VPSUBW Y3, Y10, Y10
- VPSUBW Y5, Y4, Y4
- VPSUBW Y7, Y6, Y6
- VPSUBW Y9, Y8, Y8
- VPSLLW $9, Y3, Y3
- VPSLLW $9, Y5, Y5
- VPSLLW $9, Y7, Y7
- VPSLLW $9, Y9, Y9
- VPADDW Y3, Y10, Y10
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
-
- // update
- VPSUBW Y11, Y10, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y11, Y10, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // level 7
- // shuffle
- VMOVDQU ·vpshufb_idx<>(SB), Y15
- VPSHUFB Y15, Y10, Y11
- VPSHUFB Y15, Y3, Y12
- VPSHUFB Y15, Y4, Y13
- VPSHUFB Y15, Y5, Y14
- VPBLENDW $0x55, Y10, Y12, Y10
- VPBLENDW $0xAA, Y3, Y11, Y3
- VPBLENDW $0x55, Y4, Y14, Y4
- VPBLENDW $0xAA, Y5, Y13, Y5
- VPSHUFB Y15, Y6, Y11
- VPSHUFB Y15, Y7, Y12
- VPSHUFB Y15, Y8, Y13
- VPSHUFB Y15, Y9, Y14
- VPBLENDW $0x55, Y6, Y12, Y6
- VPBLENDW $0xAA, Y7, Y11, Y7
- VPBLENDW $0x55, Y8, Y14, Y8
- VPBLENDW $0xAA, Y9, Y13, Y9
-
- // zetas
- VMOVDQU 1376(SI), Y12
- VMOVDQU 1408(SI), Y13
- VMOVDQU 1440(SI), Y14
- VMOVDQU 1472(SI), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y11
- VPSUBW Y12, Y5, Y12
- VPSUBW Y13, Y7, Y13
- VPSUBW Y14, Y9, Y14
-
- // reduce 3
- VMOVDQU ·q2_x16<>(SB), Y15
- VPSRAW $15, Y10, Y3
- VPSRAW $15, Y4, Y5
- VPSRAW $15, Y6, Y7
- VPSRAW $15, Y8, Y9
- VPAND Y15, Y3, Y3
- VPAND Y15, Y5, Y5
- VPAND Y15, Y7, Y7
- VPAND Y15, Y9, Y9
- VPADDW Y1, Y10, Y10
- VPADDW Y1, Y4, Y4
- VPADDW Y1, Y6, Y6
- VPADDW Y1, Y8, Y8
- VPADDW Y3, Y10, Y10
- VPADDW Y5, Y4, Y4
- VPADDW Y7, Y6, Y6
- VPADDW Y9, Y8, Y8
-
- // update
- VPSUBW Y11, Y10, Y3
- VPSUBW Y12, Y4, Y5
- VPSUBW Y13, Y6, Y7
- VPSUBW Y14, Y8, Y9
- VPADDW Y11, Y10, Y10
- VPADDW Y12, Y4, Y4
- VPADDW Y13, Y6, Y6
- VPADDW Y14, Y8, Y8
-
- // reorder
- VPUNPCKLWD Y3, Y10, Y12
- VPUNPCKHWD Y3, Y10, Y13
- VPUNPCKLWD Y5, Y4, Y14
- VPUNPCKHWD Y5, Y4, Y15
- VPUNPCKLWD Y7, Y6, Y3
- VPUNPCKHWD Y7, Y6, Y4
- VPUNPCKLWD Y9, Y8, Y5
- VPUNPCKHWD Y9, Y8, Y6
- VPERM2I128 $0x20, Y13, Y12, Y11
- VPERM2I128 $0x31, Y13, Y12, Y12
- VPERM2I128 $0x20, Y15, Y14, Y13
- VPERM2I128 $0x31, Y15, Y14, Y14
- VPERM2I128 $0x20, Y4, Y3, Y15
- VPERM2I128 $0x31, Y4, Y3, Y3
- VPERM2I128 $0x20, Y6, Y5, Y4
- VPERM2I128 $0x31, Y6, Y5, Y5
-
- // store
- VMOVDQU Y11, (DI)
- VMOVDQU Y12, 32(DI)
- VMOVDQU Y13, 64(DI)
- VMOVDQU Y14, 96(DI)
- VMOVDQU Y15, 128(DI)
- VMOVDQU Y3, 160(DI)
- VMOVDQU Y4, 192(DI)
- VMOVDQU Y5, 224(DI)
-
- VZEROUPPER
- RET
-
-// Go 1.10's VPERMQ support expects the imm8 to be a `int8`, instead of a
-// `uint8`. While this is fixed in master, use the signed representation
-// for now till it's reasonable to expect versions with the fix to be widely
-// available.
-//
-// See: https://github.com/golang/go/issues/24378
-#define invntt_VPERMQ_IDX $-40 // $0xd8
-
-// func invnttAVX2(inout, omegas *uint16)
-TEXT ·invnttAVX2(SB), NOSPLIT, $0-16
- MOVQ inout+0(FP), DI
- MOVQ omegas+8(FP), SI
-
- VMOVDQU ·qinv_x16<>(SB), Y0
- VMOVDQU ·q_x16<>(SB), Y1
- VMOVDQU ·v_x16<>(SB), Y2
-
- MOVQ SI, R8
-
- // first round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 128(DI), Y8
- VMOVDQU 160(DI), Y9
- VMOVDQU 192(DI), Y10
- VMOVDQU 224(DI), Y11
-
- // reorder
- VMOVDQU ·lowdword<>(SB), Y3
- VPAND Y3, Y4, Y12
- VPAND Y3, Y5, Y13
- VPAND Y3, Y6, Y14
- VPAND Y3, Y7, Y15
- VPSRLD $16, Y4, Y4
- VPSRLD $16, Y5, Y5
- VPSRLD $16, Y6, Y6
- VPSRLD $16, Y7, Y7
- VPACKUSDW Y5, Y4, Y5
- VPACKUSDW Y13, Y12, Y4
- VPACKUSDW Y7, Y6, Y7
- VPACKUSDW Y15, Y14, Y6
- VPERMQ invntt_VPERMQ_IDX, Y4, Y4
- VPERMQ invntt_VPERMQ_IDX, Y5, Y5
- VPERMQ invntt_VPERMQ_IDX, Y6, Y6
- VPERMQ invntt_VPERMQ_IDX, Y7, Y7
- VPAND Y3, Y8, Y12
- VPAND Y3, Y9, Y13
- VPAND Y3, Y10, Y14
- VPAND Y3, Y11, Y15
- VPSRLD $16, Y8, Y8
- VPSRLD $16, Y9, Y9
- VPSRLD $16, Y10, Y10
- VPSRLD $16, Y11, Y11
- VPACKUSDW Y9, Y8, Y9
- VPACKUSDW Y13, Y12, Y8
- VPACKUSDW Y11, Y10, Y11
- VPACKUSDW Y15, Y14, Y10
- VPERMQ invntt_VPERMQ_IDX, Y8, Y8
- VPERMQ invntt_VPERMQ_IDX, Y9, Y9
- VPERMQ invntt_VPERMQ_IDX, Y10, Y10
- VPERMQ invntt_VPERMQ_IDX, Y11, Y11
-
- // level 0
- // update
- VPSUBW Y5, Y4, Y12
- VPSUBW Y7, Y6, Y13
- VPSUBW Y9, Y8, Y14
- VPSUBW Y11, Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
-
- // zetas
- VMOVDQU (R8), Y7
- VMOVDQU 32(R8), Y9
- VMOVDQU 64(R8), Y11
- VMOVDQU 96(R8), Y3
-
- // mul
- VPMULLW Y7, Y12, Y5
- VPMULHW Y7, Y12, Y12
- VPMULLW Y9, Y13, Y7
- VPMULHW Y9, Y13, Y13
- VPMULLW Y11, Y14, Y9
- VPMULHW Y11, Y14, Y14
- VPMULLW Y3, Y15, Y11
- VPMULHW Y3, Y15, Y15
-
- // reduce
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y9, Y9
- VPMULLW Y0, Y11, Y11
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y9, Y9
- VPMULHW Y1, Y11, Y11
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // level 1
- // shuffle
- VMOVDQU ·vpshufb_idx<>(SB), Y3
- VPSHUFB Y3, Y4, Y12
- VPSHUFB Y3, Y5, Y13
- VPSHUFB Y3, Y6, Y14
- VPSHUFB Y3, Y7, Y15
- VPBLENDW $0x55, Y4, Y13, Y4
- VPBLENDW $0xAA, Y5, Y12, Y5
- VPBLENDW $0x55, Y6, Y15, Y6
- VPBLENDW $0xAA, Y7, Y14, Y7
- VPSHUFB Y3, Y8, Y12
- VPSHUFB Y3, Y9, Y13
- VPSHUFB Y3, Y10, Y14
- VPSHUFB Y3, Y11, Y15
- VPBLENDW $0x55, Y8, Y13, Y8
- VPBLENDW $0xAA, Y9, Y12, Y9
- VPBLENDW $0x55, Y10, Y15, Y10
- VPBLENDW $0xAA, Y11, Y14, Y11
-
- // update
- VPSUBW Y5, Y4, Y12
- VPSUBW Y7, Y6, Y13
- VPSUBW Y9, Y8, Y14
- VPSUBW Y11, Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
-
- // zetas
- VMOVDQU 256(R8), Y7
- VMOVDQU 288(R8), Y9
- VMOVDQU 320(R8), Y11
- VMOVDQU 352(R8), Y3
-
- // mul
- VPMULLW Y7, Y12, Y5
- VPMULHW Y7, Y12, Y12
- VPMULLW Y9, Y13, Y7
- VPMULHW Y9, Y13, Y13
- VPMULLW Y11, Y14, Y9
- VPMULHW Y11, Y14, Y14
- VPMULLW Y3, Y15, Y11
- VPMULHW Y3, Y15, Y15
-
- // reduce
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y9, Y9
- VPMULLW Y0, Y11, Y11
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y9, Y9
- VPMULHW Y1, Y11, Y11
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // reduce 2
- VPMULHW Y2, Y4, Y12
- VPMULHW Y2, Y6, Y13
- VPMULHW Y2, Y8, Y14
- VPMULHW Y2, Y10, Y15
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPSRAW $11, Y14, Y14
- VPSRAW $11, Y15, Y15
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPMULLW Y1, Y14, Y14
- VPMULLW Y1, Y15, Y15
- VPSUBW Y12, Y4, Y4
- VPSUBW Y13, Y6, Y6
- VPSUBW Y14, Y8, Y8
- VPSUBW Y15, Y10, Y10
-
- // level 2
- // shuffle
- VPSHUFD $0xB1, Y4, Y12
- VPSHUFD $0xB1, Y5, Y13
- VPSHUFD $0xB1, Y6, Y14
- VPSHUFD $0xB1, Y7, Y15
- VPBLENDD $0x55, Y4, Y13, Y4
- VPBLENDD $0xAA, Y5, Y12, Y5
- VPBLENDD $0x55, Y6, Y15, Y6
- VPBLENDD $0xAA, Y7, Y14, Y7
- VPSHUFD $0xB1, Y8, Y12
- VPSHUFD $0xB1, Y9, Y13
- VPSHUFD $0xB1, Y10, Y14
- VPSHUFD $0xB1, Y11, Y15
- VPBLENDD $0x55, Y8, Y13, Y8
- VPBLENDD $0xAA, Y9, Y12, Y9
- VPBLENDD $0x55, Y10, Y15, Y10
- VPBLENDD $0xAA, Y11, Y14, Y11
-
- // update
- VPSUBW Y5, Y4, Y12
- VPSUBW Y7, Y6, Y13
- VPSUBW Y9, Y8, Y14
- VPSUBW Y11, Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
-
- // zetas
- VMOVDQU 512(R8), Y7
- VMOVDQU 544(R8), Y9
- VMOVDQU 576(R8), Y11
- VMOVDQU 608(R8), Y3
-
- // mul
- VPMULLW Y7, Y12, Y5
- VPMULHW Y7, Y12, Y12
- VPMULLW Y9, Y13, Y7
- VPMULHW Y9, Y13, Y13
- VPMULLW Y11, Y14, Y9
- VPMULHW Y11, Y14, Y14
- VPMULLW Y3, Y15, Y11
- VPMULHW Y3, Y15, Y15
-
- // reduce
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y9, Y9
- VPMULLW Y0, Y11, Y11
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y9, Y9
- VPMULHW Y1, Y11, Y11
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // level 3
- // shuffle
- VSHUFPD $0x00, Y5, Y4, Y3
- VSHUFPD $0x0F, Y5, Y4, Y4
- VSHUFPD $0x00, Y7, Y6, Y5
- VSHUFPD $0x0F, Y7, Y6, Y6
- VSHUFPD $0x00, Y9, Y8, Y7
- VSHUFPD $0x0F, Y9, Y8, Y8
- VSHUFPD $0x00, Y11, Y10, Y9
- VSHUFPD $0x0F, Y11, Y10, Y10
-
- // update
- VPSUBW Y4, Y3, Y12
- VPSUBW Y6, Y5, Y13
- VPSUBW Y8, Y7, Y14
- VPSUBW Y10, Y9, Y15
- VPADDW Y3, Y4, Y3
- VPADDW Y5, Y6, Y5
- VPADDW Y7, Y8, Y7
- VPADDW Y9, Y10, Y9
-
- // zetas
- VMOVDQU 768(R8), Y6
- VMOVDQU 800(R8), Y8
- VMOVDQU 832(R8), Y10
- VMOVDQU 864(R8), Y11
-
- // mul
- VPMULLW Y6, Y12, Y4
- VPMULHW Y6, Y12, Y12
- VPMULLW Y8, Y13, Y6
- VPMULHW Y8, Y13, Y13
- VPMULLW Y10, Y14, Y8
- VPMULHW Y10, Y14, Y14
- VPMULLW Y11, Y15, Y10
- VPMULHW Y11, Y15, Y15
-
- // reduce
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y8, Y8
- VPMULLW Y0, Y10, Y10
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y8, Y8
- VPMULHW Y1, Y10, Y10
- VPSUBW Y4, Y12, Y4
- VPSUBW Y6, Y13, Y6
- VPSUBW Y8, Y14, Y8
- VPSUBW Y10, Y15, Y10
-
- // reduce 2
- VPMULHW Y2, Y3, Y12
- VPMULHW Y2, Y5, Y13
- VPMULHW Y2, Y7, Y14
- VPMULHW Y2, Y9, Y15
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPSRAW $11, Y14, Y14
- VPSRAW $11, Y15, Y15
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPMULLW Y1, Y14, Y14
- VPMULLW Y1, Y15, Y15
- VPSUBW Y12, Y3, Y3
- VPSUBW Y13, Y5, Y5
- VPSUBW Y14, Y7, Y7
- VPSUBW Y15, Y9, Y9
-
- // level 4
- // shuffle
- VPERM2I128 $0x02, Y3, Y4, Y11
- VPERM2I128 $0x13, Y3, Y4, Y3
- VPERM2I128 $0x02, Y5, Y6, Y4
- VPERM2I128 $0x13, Y5, Y6, Y5
- VPERM2I128 $0x02, Y7, Y8, Y6
- VPERM2I128 $0x13, Y7, Y8, Y7
- VPERM2I128 $0x02, Y9, Y10, Y8
- VPERM2I128 $0x13, Y9, Y10, Y9
-
- // update
- VMOVDQA Y11, Y12
- VMOVDQA Y4, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y8, Y15
- VPADDW Y11, Y3, Y10
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPSUBW Y3, Y12, Y3
- VPSUBW Y5, Y13, Y5
- VPSUBW Y7, Y14, Y7
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1024(R8), Y12
- VMOVDQU 1056(R8), Y13
- VMOVDQU 1088(R8), Y14
- VMOVDQU 1120(R8), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y3
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y7, Y7
- VPSUBW Y14, Y9, Y9
-
- // level 5
- // update
- VMOVDQA Y10, Y12
- VMOVDQA Y3, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y7, Y15
- VPADDW Y10, Y4, Y10
- VPADDW Y3, Y5, Y3
- VPADDW Y6, Y8, Y6
- VPADDW Y7, Y9, Y7
- VPSUBW Y4, Y12, Y4
- VPSUBW Y5, Y13, Y5
- VPSUBW Y8, Y14, Y8
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1280(SI), Y14
- VMOVDQU 1312(SI), Y15
-
- // mul
- VPMULLW Y14, Y4, Y11
- VPMULLW Y14, Y5, Y12
- VPMULLW Y15, Y8, Y13
- VPMULHW Y14, Y4, Y4
- VPMULHW Y14, Y5, Y5
- VPMULHW Y15, Y8, Y8
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y4, Y4
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y8, Y8
- VPSUBW Y14, Y9, Y9
-
- // reduce 2
- VPMULHW Y2, Y10, Y12
- VPMULHW Y2, Y6, Y13
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPSUBW Y12, Y10, Y10
- VPSUBW Y13, Y6, Y6
-
- // level 6
- // update
- VMOVDQA Y10, Y12
- VMOVDQA Y3, Y13
- VMOVDQA Y4, Y14
- VMOVDQA Y5, Y15
- VPADDW Y10, Y6, Y10
- VPADDW Y3, Y7, Y3
- VPADDW Y4, Y8, Y4
- VPADDW Y5, Y9, Y5
- VPSUBW Y6, Y12, Y6
- VPSUBW Y7, Y13, Y7
- VPSUBW Y8, Y14, Y8
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1408(SI), Y15
-
- // mul
- VPMULLW Y15, Y6, Y11
- VPMULLW Y15, Y7, Y12
- VPMULLW Y15, Y8, Y13
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y6, Y6
- VPMULHW Y15, Y7, Y7
- VPMULHW Y15, Y8, Y8
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y6, Y6
- VPSUBW Y12, Y7, Y7
- VPSUBW Y13, Y8, Y8
- VPSUBW Y14, Y9, Y9
-
- // reduce 2
- VPMULHW Y2, Y3, Y12
- VPSRAW $11, Y12, Y12
- VPMULLW Y1, Y12, Y12
- VPSUBW Y12, Y3, Y3
-
- // store
- VMOVDQU Y10, (DI)
- VMOVDQU Y3, 32(DI)
- VMOVDQU Y4, 64(DI)
- VMOVDQU Y5, 96(DI)
- VMOVDQU Y6, 128(DI)
- VMOVDQU Y7, 160(DI)
- VMOVDQU Y8, 192(DI)
- VMOVDQU Y9, 224(DI)
-
- ADDQ $256, DI
- ADDQ $128, R8
-
- // second round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 128(DI), Y8
- VMOVDQU 160(DI), Y9
- VMOVDQU 192(DI), Y10
- VMOVDQU 224(DI), Y11
-
- // reorder
- VMOVDQU ·lowdword<>(SB), Y3
- VPAND Y3, Y4, Y12
- VPAND Y3, Y5, Y13
- VPAND Y3, Y6, Y14
- VPAND Y3, Y7, Y15
- VPSRLD $16, Y4, Y4
- VPSRLD $16, Y5, Y5
- VPSRLD $16, Y6, Y6
- VPSRLD $16, Y7, Y7
- VPACKUSDW Y5, Y4, Y5
- VPACKUSDW Y13, Y12, Y4
- VPACKUSDW Y7, Y6, Y7
- VPACKUSDW Y15, Y14, Y6
- VPERMQ invntt_VPERMQ_IDX, Y4, Y4
- VPERMQ invntt_VPERMQ_IDX, Y5, Y5
- VPERMQ invntt_VPERMQ_IDX, Y6, Y6
- VPERMQ invntt_VPERMQ_IDX, Y7, Y7
- VPAND Y3, Y8, Y12
- VPAND Y3, Y9, Y13
- VPAND Y3, Y10, Y14
- VPAND Y3, Y11, Y15
- VPSRLD $16, Y8, Y8
- VPSRLD $16, Y9, Y9
- VPSRLD $16, Y10, Y10
- VPSRLD $16, Y11, Y11
- VPACKUSDW Y9, Y8, Y9
- VPACKUSDW Y13, Y12, Y8
- VPACKUSDW Y11, Y10, Y11
- VPACKUSDW Y15, Y14, Y10
- VPERMQ invntt_VPERMQ_IDX, Y8, Y8
- VPERMQ invntt_VPERMQ_IDX, Y9, Y9
- VPERMQ invntt_VPERMQ_IDX, Y10, Y10
- VPERMQ invntt_VPERMQ_IDX, Y11, Y11
-
- // level 0
- // update
- VMOVDQA Y4, Y12
- VMOVDQA Y6, Y13
- VMOVDQA Y8, Y14
- VMOVDQA Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // zetas
- VMOVDQU (R8), Y13
- VMOVDQU 32(R8), Y14
- VMOVDQU 64(R8), Y15
- VMOVDQU 96(R8), Y3
-
- // mul
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y7, Y7
- VPSUBW Y14, Y9, Y9
- VPSUBW Y15, Y11, Y11
-
- // level 1
- // shuffle
- VMOVDQU ·vpshufb_idx<>(SB), Y3
- VPSHUFB Y3, Y4, Y12
- VPSHUFB Y3, Y5, Y13
- VPSHUFB Y3, Y6, Y14
- VPSHUFB Y3, Y7, Y15
- VPBLENDW $0x55, Y4, Y13, Y4
- VPBLENDW $0xAA, Y5, Y12, Y5
- VPBLENDW $0x55, Y6, Y15, Y6
- VPBLENDW $0xAA, Y7, Y14, Y7
- VPSHUFB Y3, Y8, Y12
- VPSHUFB Y3, Y9, Y13
- VPSHUFB Y3, Y10, Y14
- VPSHUFB Y3, Y11, Y15
- VPBLENDW $0x55, Y8, Y13, Y8
- VPBLENDW $0xAA, Y9, Y12, Y9
- VPBLENDW $0x55, Y10, Y15, Y10
- VPBLENDW $0xAA, Y11, Y14, Y11
-
- // update
- VMOVDQA Y4, Y12
- VMOVDQA Y6, Y13
- VMOVDQA Y8, Y14
- VMOVDQA Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // zetas
- VMOVDQU 256(R8), Y13
- VMOVDQU 288(R8), Y14
- VMOVDQU 320(R8), Y15
- VMOVDQU 352(R8), Y3
-
- // mul
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y7, Y7
- VPSUBW Y14, Y9, Y9
- VPSUBW Y15, Y11, Y11
-
- // reduce 2
- VPMULHW Y2, Y4, Y12
- VPMULHW Y2, Y6, Y13
- VPMULHW Y2, Y8, Y14
- VPMULHW Y2, Y10, Y15
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPSRAW $11, Y14, Y14
- VPSRAW $11, Y15, Y15
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPMULLW Y1, Y14, Y14
- VPMULLW Y1, Y15, Y15
- VPSUBW Y12, Y4, Y4
- VPSUBW Y13, Y6, Y6
- VPSUBW Y14, Y8, Y8
- VPSUBW Y15, Y10, Y10
-
- // level 2
- // shuffle
- VPSHUFD $0xB1, Y4, Y12
- VPSHUFD $0xB1, Y5, Y13
- VPSHUFD $0xB1, Y6, Y14
- VPSHUFD $0xB1, Y7, Y15
- VPBLENDD $0x55, Y4, Y13, Y4
- VPBLENDD $0xAA, Y5, Y12, Y5
- VPBLENDD $0x55, Y6, Y15, Y6
- VPBLENDD $0xAA, Y7, Y14, Y7
- VPSHUFD $0xB1, Y8, Y12
- VPSHUFD $0xB1, Y9, Y13
- VPSHUFD $0xB1, Y10, Y14
- VPSHUFD $0xB1, Y11, Y15
- VPBLENDD $0x55, Y8, Y13, Y8
- VPBLENDD $0xAA, Y9, Y12, Y9
- VPBLENDD $0x55, Y10, Y15, Y10
- VPBLENDD $0xAA, Y11, Y14, Y11
-
- // update
- VMOVDQA Y4, Y12
- VMOVDQA Y6, Y13
- VMOVDQA Y8, Y14
- VMOVDQA Y10, Y15
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPADDW Y10, Y11, Y10
- VPSUBW Y5, Y12, Y5
- VPSUBW Y7, Y13, Y7
- VPSUBW Y9, Y14, Y9
- VPSUBW Y11, Y15, Y11
-
- // zetas
- VMOVDQU 512(R8), Y13
- VMOVDQU 544(R8), Y14
- VMOVDQU 576(R8), Y15
- VMOVDQU 608(R8), Y3
-
- // mul
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y7, Y7
- VPSUBW Y14, Y9, Y9
- VPSUBW Y15, Y11, Y11
-
- // level 3
- // shuffle
- VSHUFPD $0x00, Y5, Y4, Y3
- VSHUFPD $0x0F, Y5, Y4, Y4
- VSHUFPD $0x00, Y7, Y6, Y5
- VSHUFPD $0x0F, Y7, Y6, Y6
- VSHUFPD $0x00, Y9, Y8, Y7
- VSHUFPD $0x0F, Y9, Y8, Y8
- VSHUFPD $0x00, Y11, Y10, Y9
- VSHUFPD $0x0F, Y11, Y10, Y10
-
- // update
- VMOVDQA Y3, Y12
- VMOVDQA Y5, Y13
- VMOVDQA Y7, Y14
- VMOVDQA Y9, Y15
- VPADDW Y3, Y4, Y3
- VPADDW Y5, Y6, Y5
- VPADDW Y7, Y8, Y7
- VPADDW Y9, Y10, Y9
- VPSUBW Y4, Y12, Y4
- VPSUBW Y6, Y13, Y6
- VPSUBW Y8, Y14, Y8
- VPSUBW Y10, Y15, Y10
-
- // zetas
- VMOVDQU 768(R8), Y12
- VMOVDQU 800(R8), Y13
- VMOVDQU 832(R8), Y14
- VMOVDQU 864(R8), Y15
-
- // mul
- VPMULLW Y12, Y4, Y11
- VPMULHW Y12, Y4, Y4
- VPMULLW Y13, Y6, Y12
- VPMULHW Y13, Y6, Y6
- VPMULLW Y14, Y8, Y13
- VPMULHW Y14, Y8, Y8
- VPMULLW Y15, Y10, Y14
- VPMULHW Y15, Y10, Y10
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y4, Y4
- VPSUBW Y12, Y6, Y6
- VPSUBW Y13, Y8, Y8
- VPSUBW Y14, Y10, Y10
-
- // reduce 2
- VPMULHW Y2, Y3, Y12
- VPMULHW Y2, Y5, Y13
- VPMULHW Y2, Y7, Y14
- VPMULHW Y2, Y9, Y15
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPSRAW $11, Y14, Y14
- VPSRAW $11, Y15, Y15
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPMULLW Y1, Y14, Y14
- VPMULLW Y1, Y15, Y15
- VPSUBW Y12, Y3, Y3
- VPSUBW Y13, Y5, Y5
- VPSUBW Y14, Y7, Y7
- VPSUBW Y15, Y9, Y9
-
- // level 4
- // shuffle
- VPERM2I128 $0x02, Y3, Y4, Y11
- VPERM2I128 $0x13, Y3, Y4, Y3
- VPERM2I128 $0x02, Y5, Y6, Y4
- VPERM2I128 $0x13, Y5, Y6, Y5
- VPERM2I128 $0x02, Y7, Y8, Y6
- VPERM2I128 $0x13, Y7, Y8, Y7
- VPERM2I128 $0x02, Y9, Y10, Y8
- VPERM2I128 $0x13, Y9, Y10, Y9
-
- // update
- VMOVDQA Y11, Y12
- VMOVDQA Y4, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y8, Y15
- VPADDW Y11, Y3, Y10
- VPADDW Y4, Y5, Y4
- VPADDW Y6, Y7, Y6
- VPADDW Y8, Y9, Y8
- VPSUBW Y3, Y12, Y3
- VPSUBW Y5, Y13, Y5
- VPSUBW Y7, Y14, Y7
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1024(R8), Y12
- VMOVDQU 1056(R8), Y13
- VMOVDQU 1088(R8), Y14
- VMOVDQU 1120(R8), Y15
-
- // mul
- VPMULLW Y12, Y3, Y11
- VPMULHW Y12, Y3, Y3
- VPMULLW Y13, Y5, Y12
- VPMULHW Y13, Y5, Y5
- VPMULLW Y14, Y7, Y13
- VPMULHW Y14, Y7, Y7
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y3, Y3
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y7, Y7
- VPSUBW Y14, Y9, Y9
-
- // level 5
- // update
- VMOVDQA Y10, Y12
- VMOVDQA Y3, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y7, Y15
- VPADDW Y10, Y4, Y10
- VPADDW Y3, Y5, Y3
- VPADDW Y6, Y8, Y6
- VPADDW Y7, Y9, Y7
- VPSUBW Y4, Y12, Y4
- VPSUBW Y5, Y13, Y5
- VPSUBW Y8, Y14, Y8
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1344(SI), Y14
- VMOVDQU 1376(SI), Y15
-
- // mul
- VPMULLW Y14, Y4, Y11
- VPMULLW Y14, Y5, Y12
- VPMULLW Y15, Y8, Y13
- VPMULHW Y14, Y4, Y4
- VPMULHW Y14, Y5, Y5
- VPMULHW Y15, Y8, Y8
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y4, Y4
- VPSUBW Y12, Y5, Y5
- VPSUBW Y13, Y8, Y8
- VPSUBW Y14, Y9, Y9
-
- // reduce 2
- VPMULHW Y2, Y10, Y12
- VPMULHW Y2, Y6, Y13
- VPSRAW $11, Y12, Y12
- VPSRAW $11, Y13, Y13
- VPMULLW Y1, Y12, Y12
- VPMULLW Y1, Y13, Y13
- VPSUBW Y12, Y10, Y10
- VPSUBW Y13, Y6, Y6
-
- // level 6
- // update
- VMOVDQA Y10, Y12
- VMOVDQA Y3, Y13
- VMOVDQA Y4, Y14
- VMOVDQA Y5, Y15
- VPADDW Y10, Y6, Y10
- VPADDW Y3, Y7, Y3
- VPADDW Y4, Y8, Y4
- VPADDW Y5, Y9, Y5
- VPSUBW Y6, Y12, Y6
- VPSUBW Y7, Y13, Y7
- VPSUBW Y8, Y14, Y8
- VPSUBW Y9, Y15, Y9
-
- // zetas
- VMOVDQU 1440(SI), Y15
-
- // mul
- VPMULLW Y15, Y6, Y11
- VPMULLW Y15, Y7, Y12
- VPMULLW Y15, Y8, Y13
- VPMULLW Y15, Y9, Y14
- VPMULHW Y15, Y6, Y6
- VPMULHW Y15, Y7, Y7
- VPMULHW Y15, Y8, Y8
- VPMULHW Y15, Y9, Y9
-
- // reduce
- VPMULLW Y0, Y11, Y11
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULHW Y1, Y11, Y11
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPSUBW Y11, Y6, Y6
- VPSUBW Y12, Y7, Y7
- VPSUBW Y13, Y8, Y8
- VPSUBW Y14, Y9, Y9
-
- // reduce 2
- VPMULHW Y2, Y3, Y12
- VPSRAW $11, Y12, Y12
- VPMULLW Y1, Y12, Y12
- VPSUBW Y12, Y3, Y3
-
- // store
- VMOVDQU Y10, (DI)
- VMOVDQU Y3, 32(DI)
- VMOVDQU Y4, 64(DI)
- VMOVDQU Y5, 96(DI)
- VMOVDQU Y6, 128(DI)
- VMOVDQU Y7, 160(DI)
- VMOVDQU Y8, 192(DI)
- VMOVDQU Y9, 224(DI)
-
- SUBQ $256, DI
-
- // f
- VMOVDQU ·f_x16<>(SB), Y2
-
- // first round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 256(DI), Y8
- VMOVDQU 288(DI), Y9
- VMOVDQU 320(DI), Y10
- VMOVDQU 352(DI), Y11
-
- // level 7
- // update
- VMOVDQA Y4, Y12
- VMOVDQA Y5, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y7, Y15
- VPADDW Y4, Y8, Y4
- VPADDW Y5, Y9, Y5
- VPADDW Y6, Y10, Y6
- VPADDW Y7, Y11, Y7
- VPSUBW Y8, Y12, Y8
- VPSUBW Y9, Y13, Y9
- VPSUBW Y10, Y14, Y10
- VPSUBW Y11, Y15, Y11
-
- // zeta
- VMOVDQU 1472(SI), Y3
-
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULLW Y3, Y9, Y13
- VPMULLW Y3, Y10, Y14
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y8, Y8
- VPMULHW Y3, Y9, Y9
- VPMULHW Y3, Y10, Y10
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y8
- VPSUBW Y13, Y9, Y9
- VPSUBW Y14, Y10, Y10
- VPSUBW Y15, Y11, Y11
- VPADDW Y1, Y8, Y8
- VPADDW Y1, Y9, Y9
- VPADDW Y1, Y10, Y10
- VPADDW Y1, Y11, Y11
-
- // mul
- VPMULLW Y2, Y4, Y12
- VPMULLW Y2, Y5, Y13
- VPMULLW Y2, Y6, Y14
- VPMULLW Y2, Y7, Y15
- VPMULHW Y2, Y4, Y4
- VPMULHW Y2, Y5, Y5
- VPMULHW Y2, Y6, Y6
- VPMULHW Y2, Y7, Y7
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y4, Y4
- VPSUBW Y13, Y5, Y5
- VPSUBW Y14, Y6, Y6
- VPSUBW Y15, Y7, Y7
- VPADDW Y1, Y4, Y4
- VPADDW Y1, Y5, Y5
- VPADDW Y1, Y6, Y6
- VPADDW Y1, Y7, Y7
-
- // store
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- VMOVDQU Y6, 64(DI)
- VMOVDQU Y7, 96(DI)
- VMOVDQU Y8, 256(DI)
- VMOVDQU Y9, 288(DI)
- VMOVDQU Y10, 320(DI)
- VMOVDQU Y11, 352(DI)
-
- ADDQ $128, DI
-
- // second round
- // load
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
- VMOVDQU 64(DI), Y6
- VMOVDQU 96(DI), Y7
- VMOVDQU 256(DI), Y8
- VMOVDQU 288(DI), Y9
- VMOVDQU 320(DI), Y10
- VMOVDQU 352(DI), Y11
-
- // zeta
- VMOVDQU 1472(SI), Y3
-
- // level 7
- // update
- VMOVDQA Y4, Y12
- VMOVDQA Y5, Y13
- VMOVDQA Y6, Y14
- VMOVDQA Y7, Y15
- VPADDW Y4, Y8, Y4
- VPADDW Y5, Y9, Y5
- VPADDW Y6, Y10, Y6
- VPADDW Y7, Y11, Y7
- VPSUBW Y8, Y12, Y8
- VPSUBW Y9, Y13, Y9
- VPSUBW Y10, Y14, Y10
- VPSUBW Y11, Y15, Y11
-
- // mul
- VPMULLW Y3, Y8, Y12
- VPMULLW Y3, Y9, Y13
- VPMULLW Y3, Y10, Y14
- VPMULLW Y3, Y11, Y15
- VPMULHW Y3, Y8, Y8
- VPMULHW Y3, Y9, Y9
- VPMULHW Y3, Y10, Y10
- VPMULHW Y3, Y11, Y11
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y8, Y8
- VPSUBW Y13, Y9, Y9
- VPSUBW Y14, Y10, Y10
- VPSUBW Y15, Y11, Y11
- VPADDW Y1, Y8, Y8
- VPADDW Y1, Y9, Y9
- VPADDW Y1, Y10, Y10
- VPADDW Y1, Y11, Y11
-
- // mul
- VPMULLW Y2, Y4, Y12
- VPMULLW Y2, Y5, Y13
- VPMULLW Y2, Y6, Y14
- VPMULLW Y2, Y7, Y15
- VPMULHW Y2, Y4, Y4
- VPMULHW Y2, Y5, Y5
- VPMULHW Y2, Y6, Y6
- VPMULHW Y2, Y7, Y7
-
- // reduce
- VPMULLW Y0, Y12, Y12
- VPMULLW Y0, Y13, Y13
- VPMULLW Y0, Y14, Y14
- VPMULLW Y0, Y15, Y15
- VPMULHW Y1, Y12, Y12
- VPMULHW Y1, Y13, Y13
- VPMULHW Y1, Y14, Y14
- VPMULHW Y1, Y15, Y15
- VPSUBW Y12, Y4, Y4
- VPSUBW Y13, Y5, Y5
- VPSUBW Y14, Y6, Y6
- VPSUBW Y15, Y7, Y7
- VPADDW Y1, Y4, Y4
- VPADDW Y1, Y5, Y5
- VPADDW Y1, Y6, Y6
- VPADDW Y1, Y7, Y7
-
- // store
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- VMOVDQU Y6, 64(DI)
- VMOVDQU Y7, 96(DI)
- VMOVDQU Y8, 256(DI)
- VMOVDQU Y9, 288(DI)
- VMOVDQU Y10, 320(DI)
- VMOVDQU Y11, 352(DI)
-
- VZEROUPPER
- RET
-
-// func pointwiseAccK2AVX2(dst *uint16, a, b **uint16)
-TEXT ·pointwiseAccK2AVX2(SB), NOSPLIT, $0-24
- MOVQ dst+0(FP), DI
- MOVQ a+8(FP), SI
- MOVQ b+16(FP), DX
-
- VMOVDQU ·qinv_x16<>(SB), Y0
- VMOVDQU ·q_x16<>(SB), Y1
- VMOVDQU ·montsq_x16<>(SB), Y2
-
- XORQ AX, AX
- XORQ BX, BX
-
- MOVQ 8(SI), R8 // a[1]
- MOVQ (SI), SI // a[0]
- MOVQ 8(DX), R11 // b[1]
- MOVQ (DX), DX // b[0]
-
-looptop2:
- // load a
- VMOVDQU (SI)(BX*1), Y4
- VMOVDQU 32(SI)(BX*1), Y5
- VMOVDQU 64(SI)(BX*1), Y6
- VMOVDQU (R8)(BX*1), Y7
- VMOVDQU 32(R8)(BX*1), Y8
- VMOVDQU 64(R8)(BX*1), Y9
-
- // mul montsq
- VPMULLW Y2, Y4, Y3
- VPMULHW Y2, Y4, Y10
- VPMULLW Y2, Y5, Y4
- VPMULHW Y2, Y5, Y11
- VPMULLW Y2, Y6, Y5
- VPMULHW Y2, Y6, Y12
- VPMULLW Y2, Y7, Y6
- VPMULHW Y2, Y7, Y13
- VPMULLW Y2, Y8, Y7
- VPMULHW Y2, Y8, Y14
- VPMULLW Y2, Y9, Y8
- VPMULHW Y2, Y9, Y15
-
- // reduce
- VPMULLW Y0, Y3, Y3
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y8, Y8
- VPMULHW Y1, Y3, Y3
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y8, Y8
- VPSUBW Y3, Y10, Y3
- VPSUBW Y4, Y11, Y4
- VPSUBW Y5, Y12, Y5
- VPSUBW Y6, Y13, Y6
- VPSUBW Y7, Y14, Y7
- VPSUBW Y8, Y15, Y8
-
- // load b
- VMOVDQU (DX)(BX*1), Y9
- VMOVDQU 32(DX)(BX*1), Y10
- VMOVDQU 64(DX)(BX*1), Y11
- VMOVDQU (R11)(BX*1), Y12
- VMOVDQU 32(R11)(BX*1), Y13
- VMOVDQU 64(R11)(BX*1), Y14
-
- // mul
- VPMULLW Y3, Y9, Y15
- VPMULHW Y3, Y9, Y9
- VPMULLW Y4, Y10, Y3
- VPMULHW Y4, Y10, Y10
- VPMULLW Y5, Y11, Y4
- VPMULHW Y5, Y11, Y11
- VPMULLW Y6, Y12, Y5
- VPMULHW Y6, Y12, Y12
- VPMULLW Y7, Y13, Y6
- VPMULHW Y7, Y13, Y13
- VPMULLW Y8, Y14, Y7
- VPMULHW Y8, Y14, Y14
-
- // reduce
- VPMULLW Y0, Y15, Y15
- VPMULLW Y0, Y3, Y3
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULHW Y1, Y15, Y15
- VPMULHW Y1, Y3, Y3
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPSUBW Y15, Y9, Y15
- VPSUBW Y3, Y10, Y3
- VPSUBW Y4, Y11, Y4
- VPSUBW Y5, Y12, Y5
- VPSUBW Y6, Y13, Y6
- VPSUBW Y7, Y14, Y7
-
- // add
- VPADDW Y15, Y5, Y5
- VPADDW Y3, Y6, Y6
- VPADDW Y4, Y7, Y7
-
- // reduce 2
- VMOVDQU ·v_x16<>(SB), Y3
- VPMULHW Y3, Y5, Y8
- VPMULHW Y3, Y6, Y9
- VPMULHW Y3, Y7, Y10
- VPSRAW $11, Y8, Y8
- VPSRAW $11, Y9, Y9
- VPSRAW $11, Y10, Y10
- VPMULLW Y1, Y8, Y8
- VPMULLW Y1, Y9, Y9
- VPMULLW Y1, Y10, Y10
- VPSUBW Y8, Y5, Y5
- VPSUBW Y9, Y6, Y6
- VPSUBW Y10, Y7, Y7
-
- // store
- VMOVDQU Y5, (DI)(BX*1)
- VMOVDQU Y6, 32(DI)(BX*1)
- VMOVDQU Y7, 64(DI)(BX*1)
-
- ADDQ $1, AX
- ADDQ $96, BX
- CMPQ AX, $5
- JB looptop2
-
- // load
- VMOVDQU (SI)(BX*1), Y4
- VMOVDQU (R8)(BX*1), Y7
- VMOVDQU (DX)(BX*1), Y9
- VMOVDQU (R11)(BX*1), Y12
-
- // mul montsq
- VPMULLW Y2, Y4, Y3
- VPMULHW Y2, Y4, Y10
- VPMULLW Y2, Y7, Y6
- VPMULHW Y2, Y7, Y13
-
- // reduce
- VPMULLW Y0, Y3, Y3
- VPMULLW Y0, Y6, Y6
- VPMULHW Y1, Y3, Y3
- VPMULHW Y1, Y6, Y6
- VPSUBW Y3, Y10, Y3
- VPSUBW Y6, Y13, Y6
-
- // mul
- VPMULLW Y3, Y9, Y15
- VPMULHW Y3, Y9, Y9
- VPMULLW Y6, Y12, Y5
- VPMULHW Y6, Y12, Y12
-
- // reduce
- VPMULLW Y0, Y15, Y15
- VPMULLW Y0, Y5, Y5
- VPMULHW Y1, Y15, Y15
- VPMULHW Y1, Y5, Y5
- VPSUBW Y15, Y9, Y15
- VPSUBW Y5, Y12, Y5
-
- // add
- VPADDW Y15, Y5, Y5
-
- // reduce 2
- VMOVDQU ·v_x16<>(SB), Y3
- VPMULHW Y3, Y5, Y8
- VPSRAW $11, Y8, Y8
- VPMULLW Y1, Y8, Y8
- VPSUBW Y8, Y5, Y5
-
- // store
- VMOVDQU Y5, (DI)(BX*1)
-
- VZEROUPPER
- RET
-
-// func pointwiseAccK2AVX2(dst *uint16, a, b **uint16)
-TEXT ·pointwiseAccK3AVX2(SB), NOSPLIT, $0-24
- MOVQ dst+0(FP), DI
- MOVQ a+8(FP), SI
- MOVQ b+16(FP), DX
-
- VMOVDQU ·qinv_x16<>(SB), Y0
- VMOVDQU ·q_x16<>(SB), Y1
- VMOVDQU ·montsq_x16<>(SB), Y2
-
- XORQ AX, AX
- XORQ BX, BX
-
- MOVQ (16)(SI), R9 // a[2]
- MOVQ 8(SI), R8 // a[1]
- MOVQ (SI), SI // a[0]
- MOVQ 16(DX), R12 // b[2]
- MOVQ 8(DX), R11 // b[1]
- MOVQ (DX), DX // b[0]
-
-looptop3:
- // load a
- VMOVDQU (SI)(BX*1), Y4
- VMOVDQU 32(SI)(BX*1), Y5
- VMOVDQU (R8)(BX*1), Y6
- VMOVDQU 32(R8)(BX*1), Y7
- VMOVDQU (R9)(BX*1), Y8
- VMOVDQU 32(R9)(BX*1), Y9
-
- // mul montsq
- VPMULLW Y2, Y4, Y3
- VPMULHW Y2, Y4, Y10
- VPMULLW Y2, Y5, Y4
- VPMULHW Y2, Y5, Y11
- VPMULLW Y2, Y6, Y5
- VPMULHW Y2, Y6, Y12
- VPMULLW Y2, Y7, Y6
- VPMULHW Y2, Y7, Y13
- VPMULLW Y2, Y8, Y7
- VPMULHW Y2, Y8, Y14
- VPMULLW Y2, Y9, Y8
- VPMULHW Y2, Y9, Y15
-
- // reduce
- VPMULLW Y0, Y3, Y3
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y8, Y8
- VPMULHW Y1, Y3, Y3
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y8, Y8
- VPSUBW Y3, Y10, Y3
- VPSUBW Y4, Y11, Y4
- VPSUBW Y5, Y12, Y5
- VPSUBW Y6, Y13, Y6
- VPSUBW Y7, Y14, Y7
- VPSUBW Y8, Y15, Y8
-
- // load b
- VMOVDQU (DX)(BX*1), Y9
- VMOVDQU 32(DX)(BX*1), Y10
- VMOVDQU (R11)(BX*1), Y11
- VMOVDQU 32(R11)(BX*1), Y12
- VMOVDQU (R12)(BX*1), Y13
- VMOVDQU 32(R12)(BX*1), Y14
-
- // mul
- VPMULLW Y3, Y9, Y15
- VPMULHW Y3, Y9, Y9
- VPMULLW Y4, Y10, Y3
- VPMULHW Y4, Y10, Y10
- VPMULLW Y5, Y11, Y4
- VPMULHW Y5, Y11, Y11
- VPMULLW Y6, Y12, Y5
- VPMULHW Y6, Y12, Y12
- VPMULLW Y7, Y13, Y6
- VPMULHW Y7, Y13, Y13
- VPMULLW Y8, Y14, Y7
- VPMULHW Y8, Y14, Y14
-
- // reduce
- VPMULLW Y0, Y15, Y15
- VPMULLW Y0, Y3, Y3
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULHW Y1, Y15, Y15
- VPMULHW Y1, Y3, Y3
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPSUBW Y15, Y9, Y15
- VPSUBW Y3, Y10, Y3
- VPSUBW Y4, Y11, Y4
- VPSUBW Y5, Y12, Y5
- VPSUBW Y6, Y13, Y6
- VPSUBW Y7, Y14, Y7
-
- // add
- VPADDW Y15, Y4, Y4
- VPADDW Y3, Y5, Y5
- VPADDW Y4, Y6, Y6
- VPADDW Y5, Y7, Y7
-
- // reduce 2
- VMOVDQU ·v_x16<>(SB), Y3
- VPMULHW Y3, Y6, Y8
- VPMULHW Y3, Y7, Y9
- VPSRAW $11, Y8, Y8
- VPSRAW $11, Y9, Y9
- VPMULLW Y1, Y8, Y8
- VPMULLW Y1, Y9, Y9
- VPSUBW Y8, Y6, Y6
- VPSUBW Y9, Y7, Y7
-
- // store
- VMOVDQU Y6, (DI)(BX*1)
- VMOVDQU Y7, 32(DI)(BX*1)
-
- ADDQ $1, AX
- ADDQ $64, BX
- CMPQ AX, $8
- JB looptop3
-
- VZEROUPPER
- RET
-
-// func pointwiseAccK2AVX2(dst *uint16, a, b **uint16)
-TEXT ·pointwiseAccK4AVX2(SB), NOSPLIT, $0-24
- MOVQ dst+0(FP), DI
- MOVQ a+8(FP), SI
- MOVQ b+16(FP), DX
-
- VMOVDQU ·qinv_x16<>(SB), Y0
- VMOVDQU ·q_x16<>(SB), Y1
- VMOVDQU ·montsq_x16<>(SB), Y2
- VMOVDQU ·v_x16<>(SB), Y3
-
- XORQ AX, AX
- XORQ BX, BX
-
- MOVQ 24(SI), R10 // a[3]
- MOVQ 16(SI), R9 // a[2]
- MOVQ 8(SI), R8 // a[1]
- MOVQ (SI), SI // a[0]
- MOVQ 24(DX), R13 // b[3]
- MOVQ 16(DX), R12 // b[2]
- MOVQ 8(DX), R11 // b[1]
- MOVQ (DX), DX // b[0]
-
-looptop4:
- // load a
- VMOVDQU (SI)(BX*1), Y6
- VMOVDQU (R8)(BX*1), Y7
- VMOVDQU (R9)(BX*1), Y8
- VMOVDQU (R10)(BX*1), Y9
-
- // mul montsq
- VPMULLW Y2, Y6, Y5
- VPMULHW Y2, Y6, Y10
- VPMULLW Y2, Y7, Y6
- VPMULHW Y2, Y7, Y11
- VPMULLW Y2, Y8, Y7
- VPMULHW Y2, Y8, Y12
- VPMULLW Y2, Y9, Y8
- VPMULHW Y2, Y9, Y13
-
- // reduce
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULLW Y0, Y8, Y8
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPMULHW Y1, Y8, Y8
- VPSUBW Y5, Y10, Y5
- VPSUBW Y6, Y11, Y6
- VPSUBW Y7, Y12, Y7
- VPSUBW Y8, Y13, Y8
-
- // load b
- VMOVDQU (DX)(BX*1), Y9
- VMOVDQU (R11)(BX*1), Y10
- VMOVDQU (R12)(BX*1), Y11
- VMOVDQU (R13)(BX*1), Y12
-
- // mul
- VPMULLW Y5, Y9, Y4
- VPMULHW Y5, Y9, Y9
- VPMULLW Y6, Y10, Y5
- VPMULHW Y6, Y10, Y10
- VPMULLW Y7, Y11, Y6
- VPMULHW Y7, Y11, Y11
- VPMULLW Y8, Y12, Y7
- VPMULHW Y8, Y12, Y12
-
- // reduce
- VPMULLW Y0, Y4, Y4
- VPMULLW Y0, Y5, Y5
- VPMULLW Y0, Y6, Y6
- VPMULLW Y0, Y7, Y7
- VPMULHW Y1, Y4, Y4
- VPMULHW Y1, Y5, Y5
- VPMULHW Y1, Y6, Y6
- VPMULHW Y1, Y7, Y7
- VPSUBW Y4, Y9, Y4
- VPSUBW Y5, Y10, Y5
- VPSUBW Y6, Y11, Y6
- VPSUBW Y7, Y12, Y7
-
- // add
- VPADDW Y4, Y5, Y5
- VPADDW Y5, Y6, Y6
- VPADDW Y6, Y7, Y7
-
- // reduce 2
- VPMULHW Y3, Y7, Y8
- VPSRAW $11, Y8, Y8
- VPMULLW Y1, Y8, Y8
- VPSUBW Y8, Y7, Y8
-
- // store
- VMOVDQU Y8, (DI)(BX*1)
-
- ADDQ $1, AX
- ADDQ $32, BX
- CMPQ AX, $16
- JB looptop4
-
- VZEROUPPER
- RET
-
-// func cbdEta4AVX2(dst *uint16, b *byte)
-TEXT ·cbdEta4AVX2(SB), NOSPLIT, $0-16
- MOVQ dst+0(FP), DI
- MOVQ b+8(FP), SI
-
- VMOVDQU ·mask11<>(SB), Y0
- VMOVDQU ·mask0f<>(SB), Y1
- VMOVDQU ·q_x16<>(SB), Y2
-
- MOVQ $256, DX
-
-looptop:
- VMOVUPD 0(SI), Y3
- VPAND Y3, Y0, Y4
- VPSRLW $1, Y3, Y3
- VPAND Y3, Y0, Y5
- VPADDB Y5, Y4, Y4
- VPSRLW $1, Y3, Y3
- VPAND Y3, Y0, Y5
- VPADDB Y5, Y4, Y4
- VPSRLW $1, Y3, Y3
- VPAND Y3, Y0, Y3
- VPADDB Y3, Y4, Y3
- VPSRLW $4, Y3, Y4
- VPAND Y3, Y1, Y3
- VPAND Y4, Y1, Y4
- VPSUBB Y4, Y3, Y3
- VPMOVSXBW X3, Y4
- VPADDW Y2, Y4, Y4
- VMOVUPD Y4, 0(DI)
- VPERM2F128 $0x21, Y3, Y3, Y3
- VPMOVSXBW X3, Y4
- VPADDW Y2, Y4, Y4
- VMOVUPD Y4, 32(DI)
-
- ADDQ $64, DI
- ADDQ $32, SI
- SUBQ $32, DX
- JA looptop
-
- VZEROUPPER
- RET
diff --git a/vendor/blitter.com/go/kyber/hwaccel_ref.go b/vendor/blitter.com/go/kyber/hwaccel_ref.go
deleted file mode 100644
index 4b58010..0000000
--- a/vendor/blitter.com/go/kyber/hwaccel_ref.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// hwaccel_ref.go - Unaccelerated stubs.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-// +build !amd64 gccgo noasm !go1.10
-
-package kyber
-
-func initHardwareAcceleration() {
- forceDisableHardwareAcceleration()
-}
diff --git a/vendor/blitter.com/go/kyber/indcpa.go b/vendor/blitter.com/go/kyber/indcpa.go
deleted file mode 100644
index 5c4186f..0000000
--- a/vendor/blitter.com/go/kyber/indcpa.go
+++ /dev/null
@@ -1,279 +0,0 @@
-// indcpa.go - Kyber IND-CPA encryption.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-import (
- "io"
-
- "golang.org/x/crypto/sha3"
-)
-
-// Serialize the public key as concatenation of the compressed and serialized
-// vector of polynomials pk and the public seed used to generate the matrix A.
-func packPublicKey(r []byte, pk *polyVec, seed []byte) {
- pk.compress(r)
- copy(r[pk.compressedSize():], seed[:SymSize])
-}
-
-// De-serialize and decompress public key from a byte array; approximate
-// inverse of packPublicKey.
-func unpackPublicKey(pk *polyVec, seed, packedPk []byte) {
- pk.decompress(packedPk)
-
- off := pk.compressedSize()
- copy(seed, packedPk[off:off+SymSize])
-}
-
-// Serialize the ciphertext as concatenation of the compressed and serialized
-// vector of polynomials b and the compressed and serialized polynomial v.
-func packCiphertext(r []byte, b *polyVec, v *poly) {
- b.compress(r)
- v.compress(r[b.compressedSize():])
-}
-
-// De-serialize and decompress ciphertext from a byte array; approximate
-// inverse of packCiphertext.
-func unpackCiphertext(b *polyVec, v *poly, c []byte) {
- b.decompress(c)
- v.decompress(c[b.compressedSize():])
-}
-
-// Serialize the secret key.
-func packSecretKey(r []byte, sk *polyVec) {
- sk.toBytes(r)
-}
-
-// De-serialize the secret key; inverse of packSecretKey.
-func unpackSecretKey(sk *polyVec, packedSk []byte) {
- sk.fromBytes(packedSk)
-}
-
-// Deterministically generate matrix A (or the transpose of A) from a seed.
-// Entries of the matrix are polynomials that look uniformly random. Performs
-// rejection sampling on output of SHAKE-128.
-func genMatrix(a []polyVec, seed []byte, transposed bool) {
- const (
- shake128Rate = 168 // xof.BlockSize() is not a constant.
- maxBlocks = 4
- )
- var buf [shake128Rate * maxBlocks]byte
-
- var extSeed [SymSize + 2]byte
- copy(extSeed[:SymSize], seed)
-
- xof := sha3.NewShake128()
-
- for i, v := range a {
- for j, p := range v.vec {
- if transposed {
- extSeed[SymSize] = byte(i)
- extSeed[SymSize+1] = byte(j)
- } else {
- extSeed[SymSize] = byte(j)
- extSeed[SymSize+1] = byte(i)
- }
-
- xof.Write(extSeed[:])
- xof.Read(buf[:])
-
- for ctr, pos, maxPos := 0, 0, len(buf); ctr < kyberN; {
- val := (uint16(buf[pos]) | (uint16(buf[pos+1]) << 8)) & 0x1fff
- if val < kyberQ {
- p.coeffs[ctr] = val
- ctr++
- }
- if pos += 2; pos == maxPos {
- // On the unlikely chance 4 blocks is insufficient,
- // incrementally squeeze out 1 block at a time.
- xof.Read(buf[:shake128Rate])
- pos, maxPos = 0, shake128Rate
- }
- }
-
- xof.Reset()
- }
- }
-}
-
-type indcpaPublicKey struct {
- packed []byte
- h [32]byte
-}
-
-func (pk *indcpaPublicKey) toBytes() []byte {
- return pk.packed
-}
-
-func (pk *indcpaPublicKey) fromBytes(p *ParameterSet, b []byte) error {
- if len(b) != p.indcpaPublicKeySize {
- return ErrInvalidKeySize
- }
-
- pk.packed = make([]byte, len(b))
- copy(pk.packed, b)
- pk.h = sha3.Sum256(b)
-
- return nil
-}
-
-type indcpaSecretKey struct {
- packed []byte
-}
-
-func (sk *indcpaSecretKey) fromBytes(p *ParameterSet, b []byte) error {
- if len(b) != p.indcpaSecretKeySize {
- return ErrInvalidKeySize
- }
-
- sk.packed = make([]byte, len(b))
- copy(sk.packed, b)
-
- return nil
-}
-
-// Generates public and private key for the CPA-secure public-key encryption
-// scheme underlying Kyber.
-func (p *ParameterSet) indcpaKeyPair(rng io.Reader) (*indcpaPublicKey, *indcpaSecretKey, error) {
- buf := make([]byte, SymSize+SymSize)
- if _, err := io.ReadFull(rng, buf[:SymSize]); err != nil {
- return nil, nil, err
- }
-
- sk := &indcpaSecretKey{
- packed: make([]byte, p.indcpaSecretKeySize),
- }
- pk := &indcpaPublicKey{
- packed: make([]byte, p.indcpaPublicKeySize),
- }
-
- h := sha3.New512()
- h.Write(buf[:SymSize])
- buf = buf[:0] // Reuse the backing store.
- buf = h.Sum(buf)
- publicSeed, noiseSeed := buf[:SymSize], buf[SymSize:]
-
- a := p.allocMatrix()
- genMatrix(a, publicSeed, false)
-
- var nonce byte
- skpv := p.allocPolyVec()
- for _, pv := range skpv.vec {
- pv.getNoise(noiseSeed, nonce, p.eta)
- nonce++
- }
-
- skpv.ntt()
-
- e := p.allocPolyVec()
- for _, pv := range e.vec {
- pv.getNoise(noiseSeed, nonce, p.eta)
- nonce++
- }
-
- // matrix-vector multiplication
- pkpv := p.allocPolyVec()
- for i, pv := range pkpv.vec {
- pv.pointwiseAcc(&skpv, &a[i])
- }
-
- pkpv.invntt()
- pkpv.add(&pkpv, &e)
-
- packSecretKey(sk.packed, &skpv)
- packPublicKey(pk.packed, &pkpv, publicSeed)
- pk.h = sha3.Sum256(pk.packed)
-
- return pk, sk, nil
-}
-
-// Encryption function of the CPA-secure public-key encryption scheme
-// underlying Kyber.
-func (p *ParameterSet) indcpaEncrypt(c, m []byte, pk *indcpaPublicKey, coins []byte) {
- var k, v, epp poly
- var seed [SymSize]byte
-
- pkpv := p.allocPolyVec()
- unpackPublicKey(&pkpv, seed[:], pk.packed)
-
- k.fromMsg(m)
-
- pkpv.ntt()
-
- at := p.allocMatrix()
- genMatrix(at, seed[:], true)
-
- var nonce byte
- sp := p.allocPolyVec()
- for _, pv := range sp.vec {
- pv.getNoise(coins, nonce, p.eta)
- nonce++
- }
-
- sp.ntt()
-
- ep := p.allocPolyVec()
- for _, pv := range ep.vec {
- pv.getNoise(coins, nonce, p.eta)
- nonce++
- }
-
- // matrix-vector multiplication
- bp := p.allocPolyVec()
- for i, pv := range bp.vec {
- pv.pointwiseAcc(&sp, &at[i])
- }
-
- bp.invntt()
- bp.add(&bp, &ep)
-
- v.pointwiseAcc(&pkpv, &sp)
- v.invntt()
-
- epp.getNoise(coins, nonce, p.eta) // Don't need to increment nonce.
-
- v.add(&v, &epp)
- v.add(&v, &k)
-
- packCiphertext(c, &bp, &v)
-}
-
-// Decryption function of the CPA-secure public-key encryption scheme
-// underlying Kyber.
-func (p *ParameterSet) indcpaDecrypt(m, c []byte, sk *indcpaSecretKey) {
- var v, mp poly
-
- skpv, bp := p.allocPolyVec(), p.allocPolyVec()
- unpackCiphertext(&bp, &v, c)
- unpackSecretKey(&skpv, sk.packed)
-
- bp.ntt()
-
- mp.pointwiseAcc(&skpv, &bp)
- mp.invntt()
-
- mp.sub(&mp, &v)
-
- mp.toMsg(m)
-}
-
-func (p *ParameterSet) allocMatrix() []polyVec {
- m := make([]polyVec, 0, p.k)
- for i := 0; i < p.k; i++ {
- m = append(m, p.allocPolyVec())
- }
- return m
-}
-
-func (p *ParameterSet) allocPolyVec() polyVec {
- vec := make([]*poly, 0, p.k)
- for i := 0; i < p.k; i++ {
- vec = append(vec, new(poly))
- }
-
- return polyVec{vec}
-}
diff --git a/vendor/blitter.com/go/kyber/kem.go b/vendor/blitter.com/go/kyber/kem.go
deleted file mode 100644
index 363040c..0000000
--- a/vendor/blitter.com/go/kyber/kem.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// kem.go - Kyber key encapsulation mechanism.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-import (
- "bytes"
- "crypto/subtle"
- "errors"
- "io"
-
- "golang.org/x/crypto/sha3"
-)
-
-var (
- // ErrInvalidKeySize is the error returned when a byte serailized key is
- // an invalid size.
- ErrInvalidKeySize = errors.New("kyber: invalid key size")
-
- // ErrInvalidCipherTextSize is the error thrown via a panic when a byte
- // serialized ciphertext is an invalid size.
- ErrInvalidCipherTextSize = errors.New("kyber: invalid ciphertext size")
-
- // ErrInvalidPrivateKey is the error returned when a byte serialized
- // private key is malformed.
- ErrInvalidPrivateKey = errors.New("kyber: invalid private key")
-)
-
-// PrivateKey is a Kyber private key.
-type PrivateKey struct {
- PublicKey
- sk *indcpaSecretKey
- z []byte
-}
-
-// Bytes returns the byte serialization of a PrivateKey.
-func (sk *PrivateKey) Bytes() []byte {
- p := sk.PublicKey.p
-
- b := make([]byte, 0, p.secretKeySize)
- b = append(b, sk.sk.packed...)
- b = append(b, sk.PublicKey.pk.packed...)
- b = append(b, sk.PublicKey.pk.h[:]...)
- b = append(b, sk.z...)
-
- return b
-}
-
-// PrivateKeyFromBytes deserializes a byte serialized PrivateKey.
-func (p *ParameterSet) PrivateKeyFromBytes(b []byte) (*PrivateKey, error) {
- if len(b) != p.secretKeySize {
- return nil, ErrInvalidKeySize
- }
-
- sk := new(PrivateKey)
- sk.sk = new(indcpaSecretKey)
- sk.z = make([]byte, SymSize)
- sk.PublicKey.pk = new(indcpaPublicKey)
- sk.PublicKey.p = p
-
- // De-serialize the public key first.
- off := p.indcpaSecretKeySize
- if err := sk.PublicKey.pk.fromBytes(p, b[off:off+p.publicKeySize]); err != nil {
- return nil, err
- }
- off += p.publicKeySize
- if !bytes.Equal(sk.PublicKey.pk.h[:], b[off:off+SymSize]) {
- return nil, ErrInvalidPrivateKey
- }
- off += SymSize
- copy(sk.z, b[off:])
-
- // Then go back to de-serialize the private key.
- if err := sk.sk.fromBytes(p, b[:p.indcpaSecretKeySize]); err != nil {
- return nil, err
- }
-
- return sk, nil
-}
-
-// PublicKey is a Kyber public key.
-type PublicKey struct {
- pk *indcpaPublicKey
- p *ParameterSet
-}
-
-// Bytes returns the byte serialization of a PublicKey.
-func (pk *PublicKey) Bytes() []byte {
- return pk.pk.toBytes()
-}
-
-// PublicKeyFromBytes deserializes a byte serialized PublicKey.
-func (p *ParameterSet) PublicKeyFromBytes(b []byte) (*PublicKey, error) {
- pk := &PublicKey{
- pk: new(indcpaPublicKey),
- p: p,
- }
-
- if err := pk.pk.fromBytes(p, b); err != nil {
- return nil, err
- }
-
- return pk, nil
-}
-
-// GenerateKeyPair generates a private and public key parameterized with the
-// given ParameterSet.
-func (p *ParameterSet) GenerateKeyPair(rng io.Reader) (*PublicKey, *PrivateKey, error) {
- kp := new(PrivateKey)
-
- var err error
- if kp.PublicKey.pk, kp.sk, err = p.indcpaKeyPair(rng); err != nil {
- return nil, nil, err
- }
-
- kp.PublicKey.p = p
- kp.z = make([]byte, SymSize)
- if _, err := io.ReadFull(rng, kp.z); err != nil {
- return nil, nil, err
- }
-
- return &kp.PublicKey, kp, nil
-}
-
-// KEMEncrypt generates cipher text and shared secret via the CCA-secure Kyber
-// key encapsulation mechanism.
-func (pk *PublicKey) KEMEncrypt(rng io.Reader) (cipherText []byte, sharedSecret []byte, err error) {
- var buf [SymSize]byte
- if _, err = io.ReadFull(rng, buf[:]); err != nil {
- return nil, nil, err
- }
- buf = sha3.Sum256(buf[:]) // Don't release system RNG output
-
- hKr := sha3.New512()
- hKr.Write(buf[:])
- hKr.Write(pk.pk.h[:]) // Multitarget countermeasures for coins + contributory KEM
- kr := hKr.Sum(nil)
-
- cipherText = make([]byte, pk.p.cipherTextSize)
- pk.p.indcpaEncrypt(cipherText, buf[:], pk.pk, kr[SymSize:]) // coins are in kr[SymSize:]
-
- hc := sha3.Sum256(cipherText)
- copy(kr[SymSize:], hc[:]) // overwrite coins in kr with H(c)
- hSs := sha3.New256()
- hSs.Write(kr)
- sharedSecret = hSs.Sum(nil) // hash concatenation of pre-k and H(c) to k
-
- return
-}
-
-// KEMDecrypt generates shared secret for given cipher text via the CCA-secure
-// Kyber key encapsulation mechanism.
-//
-// On failures, sharedSecret will contain a randomized value. Providing a
-// cipher text that is obviously malformed (too large/small) will result in a
-// panic.
-func (sk *PrivateKey) KEMDecrypt(cipherText []byte) (sharedSecret []byte) {
- var buf [2 * SymSize]byte
-
- p := sk.PublicKey.p
- if len(cipherText) != p.CipherTextSize() {
- panic(ErrInvalidCipherTextSize)
- }
- p.indcpaDecrypt(buf[:SymSize], cipherText, sk.sk)
-
- copy(buf[SymSize:], sk.PublicKey.pk.h[:]) // Multitarget countermeasure for coins + contributory KEM
- kr := sha3.Sum512(buf[:])
-
- cmp := make([]byte, p.cipherTextSize)
- p.indcpaEncrypt(cmp, buf[:SymSize], sk.PublicKey.pk, kr[SymSize:]) // coins are in kr[SymSize:]
-
- hc := sha3.Sum256(cipherText)
- copy(kr[SymSize:], hc[:]) // overwrite coins in kr with H(c)
-
- fail := subtle.ConstantTimeSelect(subtle.ConstantTimeCompare(cipherText, cmp), 0, 1)
- subtle.ConstantTimeCopy(fail, kr[SymSize:], sk.z) // Overwrite pre-k with z on re-encryption failure
-
- h := sha3.New256()
- h.Write(kr[:])
- sharedSecret = h.Sum(nil)
-
- return
-}
diff --git a/vendor/blitter.com/go/kyber/kex.go b/vendor/blitter.com/go/kyber/kex.go
deleted file mode 100644
index f6b56c1..0000000
--- a/vendor/blitter.com/go/kyber/kex.go
+++ /dev/null
@@ -1,249 +0,0 @@
-// kex.go - Kyber key exchange.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-import (
- "errors"
- "io"
-
- "golang.org/x/crypto/sha3"
-)
-
-var (
- // ErrInvalidMessageSize is the error thrown via a panic when a initator
- // or responder message is an invalid size.
- ErrInvalidMessageSize = errors.New("kyber: invalid message size")
-
- // ErrParameterSetMismatch is the error thrown via a panic when there
- // is a mismatch between parameter sets.
- ErrParameterSetMismatch = errors.New("kyber: parameter set mismatch")
-)
-
-// UAKEInitiatorMessageSize returns the size of the initiator UAKE message
-// in bytes.
-func (p *ParameterSet) UAKEInitiatorMessageSize() int {
- return p.PublicKeySize() + p.CipherTextSize()
-}
-
-// UAKEResponderMessageSize returns the size of the responder UAKE message
-// in bytes.
-func (p *ParameterSet) UAKEResponderMessageSize() int {
- return p.CipherTextSize()
-}
-
-// UAKEInitiatorState is a initiator UAKE instance. Each instance MUST only
-// be used for one key exchange and never reused.
-type UAKEInitiatorState struct {
- // Message is the UAKE message to send to the responder.
- Message []byte
-
- eSk *PrivateKey
- tk []byte
-}
-
-// Shared generates a shared secret for the given UAKE instance and responder
-// message.
-//
-// On failures, sharedSecret will contain a randomized value. Providing a
-// cipher text that is obviously malformed (too large/small) will result in a
-// panic.
-func (s *UAKEInitiatorState) Shared(recv []byte) (sharedSecret []byte) {
- xof := sha3.NewShake256()
- var tk []byte
-
- tk = s.eSk.KEMDecrypt(recv)
- xof.Write(tk)
- xof.Write(s.tk)
- sharedSecret = make([]byte, SymSize)
- xof.Read(sharedSecret)
-
- return
-}
-
-// NewUAKEInitiatorState creates a new initiator UAKE instance.
-func (pk *PublicKey) NewUAKEInitiatorState(rng io.Reader) (*UAKEInitiatorState, error) {
- s := new(UAKEInitiatorState)
- s.Message = make([]byte, 0, pk.p.UAKEInitiatorMessageSize())
-
- var err error
- _, s.eSk, err = pk.p.GenerateKeyPair(rng)
- if err != nil {
- return nil, err
- }
- s.Message = append(s.Message, s.eSk.PublicKey.Bytes()...)
-
- var ct []byte
- ct, s.tk, err = pk.KEMEncrypt(rng)
- if err != nil {
- return nil, err
- }
-
- s.Message = append(s.Message, ct...)
-
- return s, nil
-}
-
-// UAKEResponderShared generates a responder message and shared secret given
-// a initiator UAKE message.
-//
-// On failures, sharedSecret will contain a randomized value. Providing a
-// cipher text that is obviously malformed (too large/small) will result in a
-// panic.
-func (sk *PrivateKey) UAKEResponderShared(rng io.Reader, recv []byte) (message, sharedSecret []byte) {
- p := sk.PublicKey.p
- pkLen := p.PublicKeySize()
-
- // Deserialize the peer's ephemeral public key.
- if len(recv) != p.UAKEInitiatorMessageSize() {
- panic(ErrInvalidMessageSize)
- }
- rawPk, ct := recv[:pkLen], recv[pkLen:]
- pk, err := p.PublicKeyFromBytes(rawPk)
- if err != nil {
- panic(err)
- }
-
- xof := sha3.NewShake256()
- var tk []byte
-
- message, tk, err = pk.KEMEncrypt(rng)
- if err != nil {
- panic(err)
- }
- xof.Write(tk)
-
- tk = sk.KEMDecrypt(ct)
- xof.Write(tk)
- sharedSecret = make([]byte, SymSize)
- xof.Read(sharedSecret)
-
- return
-}
-
-// AKEInitiatorMessageSize returns the size of the initiator AKE message
-// in bytes.
-func (p *ParameterSet) AKEInitiatorMessageSize() int {
- return p.PublicKeySize() + p.CipherTextSize()
-}
-
-// AKEResponderMessageSize returns the size of the responder AKE message
-// in bytes.
-func (p *ParameterSet) AKEResponderMessageSize() int {
- return 2 * p.CipherTextSize()
-}
-
-// AKEInitiatorState is a initiator AKE instance. Each instance MUST only be
-// used for one key exchange and never reused.
-type AKEInitiatorState struct {
- // Message is the AKE message to send to the responder.
- Message []byte
-
- eSk *PrivateKey
- tk []byte
-}
-
-// Shared generates a shared secret for the given AKE instance, responder
-// message, and long term initiator private key.
-//
-// On failures sharedSecret will contain a randomized value. Providing a
-// malformed responder message, or a private key that uses a different
-// ParamterSet than the AKEInitiatorState will result in a panic.
-func (s *AKEInitiatorState) Shared(recv []byte, initiatorPrivateKey *PrivateKey) (sharedSecret []byte) {
- p := s.eSk.PublicKey.p
-
- if initiatorPrivateKey.PublicKey.p != p {
- panic(ErrParameterSetMismatch)
- }
- if len(recv) != p.AKEResponderMessageSize() {
- panic(ErrInvalidMessageSize)
- }
- ctLen := p.CipherTextSize()
-
- xof := sha3.NewShake256()
- var tk []byte
-
- tk = s.eSk.KEMDecrypt(recv[:ctLen])
- xof.Write(tk)
-
- tk = initiatorPrivateKey.KEMDecrypt(recv[ctLen:])
- xof.Write(tk)
-
- xof.Write(s.tk)
- sharedSecret = make([]byte, SymSize)
- xof.Read(sharedSecret)
-
- return
-}
-
-// NewAKEInitiatorState creates a new initiator AKE instance.
-func (pk *PublicKey) NewAKEInitiatorState(rng io.Reader) (*AKEInitiatorState, error) {
- s := new(AKEInitiatorState)
-
- // This is identical to the UAKE case, so just reuse the code.
- us, err := pk.NewUAKEInitiatorState(rng)
- if err != nil {
- return nil, err
- }
-
- s.Message = us.Message
- s.eSk = us.eSk
- s.tk = us.tk
-
- return s, nil
-}
-
-// AKEResponderShared generates a responder message and shared secret given
-// a initiator AKE message and long term initiator public key.
-//
-// On failures sharedSecret will contain a randomized value. Providing a
-// malformed responder message, or a private key that uses a different
-// ParamterSet than the AKEInitiatorState will result in a panic.
-func (sk *PrivateKey) AKEResponderShared(rng io.Reader, recv []byte, peerPublicKey *PublicKey) (message, sharedSecret []byte) {
- p := sk.PublicKey.p
- pkLen := p.PublicKeySize()
-
- if peerPublicKey.p != p {
- panic(ErrParameterSetMismatch)
- }
-
- // Deserialize the peer's ephemeral public key.
- if len(recv) != p.AKEInitiatorMessageSize() {
- panic(ErrInvalidMessageSize)
- }
- rawPk, ct := recv[:pkLen], recv[pkLen:]
- pk, err := p.PublicKeyFromBytes(rawPk)
- if err != nil {
- panic(err)
- }
-
- message = make([]byte, 0, p.AKEResponderMessageSize())
-
- xof := sha3.NewShake256()
- var tk, tmp []byte
-
- tmp, tk, err = pk.KEMEncrypt(rng)
- if err != nil {
- panic(err)
- }
- xof.Write(tk)
- message = append(message, tmp...)
-
- tmp, tk, err = peerPublicKey.KEMEncrypt(rng)
- if err != nil {
- panic(err)
- }
- xof.Write(tk)
- message = append(message, tmp...)
-
- tk = sk.KEMDecrypt(ct)
- xof.Write(tk)
- sharedSecret = make([]byte, SymSize)
- xof.Read(sharedSecret)
-
- return
-}
diff --git a/vendor/blitter.com/go/kyber/ntt.go b/vendor/blitter.com/go/kyber/ntt.go
deleted file mode 100644
index 5fb9580..0000000
--- a/vendor/blitter.com/go/kyber/ntt.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// ntt.go - Number-Theoretic Transform.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-// Computes negacyclic number-theoretic transform (NTT) of a polynomial (vector
-// of 256 coefficients) in place; inputs assumed to be in normal order, output
-// in bitreversed order.
-func nttRef(p *[kyberN]uint16) {
- var j int
- k := 1
- for level := 7; level >= 0; level-- {
- distance := 1 << uint(level)
- for start := 0; start < kyberN; start = j + distance {
- zeta := zetas[k]
- k++
- for j = start; j < start+distance; j++ {
- t := montgomeryReduce(uint32(zeta) * uint32(p[j+distance]))
- p[j+distance] = barrettReduce(p[j] + 4*kyberQ - t)
-
- if level&1 == 1 { // odd level
- p[j] = p[j] + t // Omit reduction (be lazy)
- } else {
- p[j] = barrettReduce(p[j] + t)
- }
- }
- }
- }
-}
-
-// Computes inverse of negacyclic number-theoretic transform (NTT) of a
-// polynomial (vector of 256 coefficients) in place; inputs assumed to be in
-// bitreversed order, output in normal order.
-func invnttRef(a *[kyberN]uint16) {
- for level := 0; level < 8; level++ {
- distance := 1 << uint(level)
- for start := 0; start < distance; start++ {
- var jTwiddle int
- for j := start; j < kyberN-1; j += 2 * distance {
- w := uint32(omegasInvBitrevMontgomery[jTwiddle])
- jTwiddle++
-
- temp := a[j]
-
- if level&1 == 1 { // odd level
- a[j] = barrettReduce(temp + a[j+distance])
- } else {
- a[j] = temp + a[j+distance] // Omit reduction (be lazy)
- }
-
- t := w * (uint32(temp) + 4*kyberQ - uint32(a[j+distance]))
-
- a[j+distance] = montgomeryReduce(t)
- }
- }
- }
-
- for i, v := range psisInvMontgomery {
- a[i] = montgomeryReduce(uint32(a[i]) * uint32(v))
- }
-}
diff --git a/vendor/blitter.com/go/kyber/params.go b/vendor/blitter.com/go/kyber/params.go
deleted file mode 100644
index 32bcabd..0000000
--- a/vendor/blitter.com/go/kyber/params.go
+++ /dev/null
@@ -1,116 +0,0 @@
-// params.go - Kyber parameterization.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-const (
- // SymSize is the size of the shared key (and certain internal parameters
- // such as hashes and seeds) in bytes.
- SymSize = 32
-
- kyberN = 256
- kyberQ = 7681
-
- polySize = 416
- polyCompressedSize = 96
-
- compressedCoeffSize = 352
-)
-
-var (
- // Kyber512 is the Kyber-512 parameter set, which aims to provide security
- // equivalent to AES-128.
- //
- // This parameter set has a 1632 byte private key, 736 byte public key,
- // and a 800 byte cipher text.
- Kyber512 = newParameterSet("Kyber-512", 2)
-
- // Kyber768 is the Kyber-768 parameter set, which aims to provide security
- // equivalent to AES-192.
- //
- // This parameter set has a 2400 byte private key, 1088 byte public key,
- // and a 1152 byte cipher text.
- Kyber768 = newParameterSet("Kyber-768", 3)
-
- // Kyber1024 is the Kyber-1024 parameter set, which aims to provide
- // security equivalent to AES-256.
- //
- // This parameter set has a 3168 byte private key, 1440 byte public key,
- // and a 1504 byte cipher text.
- Kyber1024 = newParameterSet("Kyber-1024", 4)
-)
-
-// ParameterSet is a Kyber parameter set.
-type ParameterSet struct {
- name string
-
- k int
- eta int
-
- polyVecSize int
- polyVecCompressedSize int
-
- indcpaMsgSize int
- indcpaPublicKeySize int
- indcpaSecretKeySize int
- indcpaSize int
-
- publicKeySize int
- secretKeySize int
- cipherTextSize int
-}
-
-// Name returns the name of a given ParameterSet.
-func (p *ParameterSet) Name() string {
- return p.name
-}
-
-// PublicKeySize returns the size of a public key in bytes.
-func (p *ParameterSet) PublicKeySize() int {
- return p.publicKeySize
-}
-
-// PrivateKeySize returns the size of a private key in bytes.
-func (p *ParameterSet) PrivateKeySize() int {
- return p.secretKeySize
-}
-
-// CipherTextSize returns the size of a cipher text in bytes.
-func (p *ParameterSet) CipherTextSize() int {
- return p.cipherTextSize
-}
-
-func newParameterSet(name string, k int) *ParameterSet {
- var p ParameterSet
-
- p.name = name
- p.k = k
- switch k {
- case 2:
- p.eta = 5
- case 3:
- p.eta = 4
- case 4:
- p.eta = 3
- default:
- panic("kyber: k must be in {2,3,4}")
- }
-
- p.polyVecSize = k * polySize
- p.polyVecCompressedSize = k * compressedCoeffSize
-
- p.indcpaMsgSize = SymSize
- p.indcpaPublicKeySize = p.polyVecCompressedSize + SymSize
- p.indcpaSecretKeySize = p.polyVecSize
- p.indcpaSize = p.polyVecCompressedSize + polyCompressedSize
-
- p.publicKeySize = p.indcpaPublicKeySize
- p.secretKeySize = p.indcpaSecretKeySize + p.indcpaPublicKeySize + 2*SymSize // 32 bytes of additional space to save H(pk)
- p.cipherTextSize = p.indcpaSize
-
- return &p
-}
diff --git a/vendor/blitter.com/go/kyber/poly.go b/vendor/blitter.com/go/kyber/poly.go
deleted file mode 100644
index 85f27b1..0000000
--- a/vendor/blitter.com/go/kyber/poly.go
+++ /dev/null
@@ -1,146 +0,0 @@
-// poly.go - Kyber polynomial.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-import "golang.org/x/crypto/sha3"
-
-// Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial coeffs[0] +
-// X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1].
-type poly struct {
- coeffs [kyberN]uint16
-}
-
-// Compression and subsequent serialization of a polynomial.
-func (p *poly) compress(r []byte) {
- var t [8]uint32
-
- for i, k := 0, 0; i < kyberN; i, k = i+8, k+3 {
- for j := 0; j < 8; j++ {
- t[j] = uint32((((freeze(p.coeffs[i+j]) << 3) + kyberQ/2) / kyberQ) & 7)
- }
-
- r[k] = byte(t[0] | (t[1] << 3) | (t[2] << 6))
- r[k+1] = byte((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7))
- r[k+2] = byte((t[5] >> 1) | (t[6] << 2) | (t[7] << 5))
- }
-}
-
-// De-serialization and subsequent decompression of a polynomial; approximate
-// inverse of poly.compress().
-func (p *poly) decompress(a []byte) {
- for i, off := 0, 0; i < kyberN; i, off = i+8, off+3 {
- p.coeffs[i+0] = ((uint16(a[off]&7) * kyberQ) + 4) >> 3
- p.coeffs[i+1] = (((uint16(a[off]>>3) & 7) * kyberQ) + 4) >> 3
- p.coeffs[i+2] = (((uint16(a[off]>>6) | (uint16(a[off+1]<<2) & 4)) * kyberQ) + 4) >> 3
- p.coeffs[i+3] = (((uint16(a[off+1]>>1) & 7) * kyberQ) + 4) >> 3
- p.coeffs[i+4] = (((uint16(a[off+1]>>4) & 7) * kyberQ) + 4) >> 3
- p.coeffs[i+5] = (((uint16(a[off+1]>>7) | (uint16(a[off+2]<<1) & 6)) * kyberQ) + 4) >> 3
- p.coeffs[i+6] = (((uint16(a[off+2]>>2) & 7) * kyberQ) + 4) >> 3
- p.coeffs[i+7] = (((uint16(a[off+2] >> 5)) * kyberQ) + 4) >> 3
- }
-}
-
-// Serialization of a polynomial.
-func (p *poly) toBytes(r []byte) {
- var t [8]uint16
-
- for i := 0; i < kyberN/8; i++ {
- for j := 0; j < 8; j++ {
- t[j] = freeze(p.coeffs[8*i+j])
- }
-
- r[13*i+0] = byte(t[0] & 0xff)
- r[13*i+1] = byte((t[0] >> 8) | ((t[1] & 0x07) << 5))
- r[13*i+2] = byte((t[1] >> 3) & 0xff)
- r[13*i+3] = byte((t[1] >> 11) | ((t[2] & 0x3f) << 2))
- r[13*i+4] = byte((t[2] >> 6) | ((t[3] & 0x01) << 7))
- r[13*i+5] = byte((t[3] >> 1) & 0xff)
- r[13*i+6] = byte((t[3] >> 9) | ((t[4] & 0x0f) << 4))
- r[13*i+7] = byte((t[4] >> 4) & 0xff)
- r[13*i+8] = byte((t[4] >> 12) | ((t[5] & 0x7f) << 1))
- r[13*i+9] = byte((t[5] >> 7) | ((t[6] & 0x03) << 6))
- r[13*i+10] = byte((t[6] >> 2) & 0xff)
- r[13*i+11] = byte((t[6] >> 10) | ((t[7] & 0x1f) << 3))
- r[13*i+12] = byte(t[7] >> 5)
- }
-}
-
-// De-serialization of a polynomial; inverse of poly.toBytes().
-func (p *poly) fromBytes(a []byte) {
- for i := 0; i < kyberN/8; i++ {
- p.coeffs[8*i+0] = uint16(a[13*i+0]) | ((uint16(a[13*i+1]) & 0x1f) << 8)
- p.coeffs[8*i+1] = (uint16(a[13*i+1]) >> 5) | (uint16(a[13*i+2]) << 3) | ((uint16(a[13*i+3]) & 0x03) << 11)
- p.coeffs[8*i+2] = (uint16(a[13*i+3]) >> 2) | ((uint16(a[13*i+4]) & 0x7f) << 6)
- p.coeffs[8*i+3] = (uint16(a[13*i+4]) >> 7) | (uint16(a[13*i+5]) << 1) | ((uint16(a[13*i+6]) & 0x0f) << 9)
- p.coeffs[8*i+4] = (uint16(a[13*i+6]) >> 4) | (uint16(a[13*i+7]) << 4) | ((uint16(a[13*i+8]) & 0x01) << 12)
- p.coeffs[8*i+5] = (uint16(a[13*i+8]) >> 1) | ((uint16(a[13*i+9]) & 0x3f) << 7)
- p.coeffs[8*i+6] = (uint16(a[13*i+9]) >> 6) | (uint16(a[13*i+10]) << 2) | ((uint16(a[13*i+11]) & 0x07) << 10)
- p.coeffs[8*i+7] = (uint16(a[13*i+11]) >> 3) | (uint16(a[13*i+12]) << 5)
- }
-}
-
-// Convert 32-byte message to polynomial.
-func (p *poly) fromMsg(msg []byte) {
- for i, v := range msg[:SymSize] {
- for j := 0; j < 8; j++ {
- mask := -((uint16(v) >> uint(j)) & 1)
- p.coeffs[8*i+j] = mask & ((kyberQ + 1) / 2)
- }
- }
-}
-
-// Convert polynomial to 32-byte message.
-func (p *poly) toMsg(msg []byte) {
- for i := 0; i < SymSize; i++ {
- msg[i] = 0
- for j := 0; j < 8; j++ {
- t := (((freeze(p.coeffs[8*i+j]) << 1) + kyberQ/2) / kyberQ) & 1
- msg[i] |= byte(t << uint(j))
- }
- }
-}
-
-// Sample a polynomial deterministically from a seed and a nonce, with output
-// polynomial close to centered binomial distribution with parameter eta.
-func (p *poly) getNoise(seed []byte, nonce byte, eta int) {
- extSeed := make([]byte, 0, SymSize+1)
- extSeed = append(extSeed, seed...)
- extSeed = append(extSeed, nonce)
-
- buf := make([]byte, eta*kyberN/4)
- sha3.ShakeSum256(buf, extSeed)
-
- p.cbd(buf, eta)
-}
-
-// Computes negacyclic number-theoretic transform (NTT) of a polynomial in
-// place; inputs assumed to be in normal order, output in bitreversed order.
-func (p *poly) ntt() {
- hardwareAccelImpl.nttFn(&p.coeffs)
-}
-
-// Computes inverse of negacyclic number-theoretic transform (NTT) of a
-// polynomial in place; inputs assumed to be in bitreversed order, output in
-// normal order.
-func (p *poly) invntt() {
- hardwareAccelImpl.invnttFn(&p.coeffs)
-}
-
-// Add two polynomials.
-func (p *poly) add(a, b *poly) {
- for i := range p.coeffs {
- p.coeffs[i] = barrettReduce(a.coeffs[i] + b.coeffs[i])
- }
-}
-
-// Subtract two polynomials.
-func (p *poly) sub(a, b *poly) {
- for i := range p.coeffs {
- p.coeffs[i] = barrettReduce(a.coeffs[i] + 3*kyberQ - b.coeffs[i])
- }
-}
diff --git a/vendor/blitter.com/go/kyber/polyvec.go b/vendor/blitter.com/go/kyber/polyvec.go
deleted file mode 100644
index f4c4b4a..0000000
--- a/vendor/blitter.com/go/kyber/polyvec.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// polyvec.go - Vector of Kyber polynomials.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-type polyVec struct {
- vec []*poly
-}
-
-// Compress and serialize vector of polynomials.
-func (v *polyVec) compress(r []byte) {
- var off int
- for _, vec := range v.vec {
- for j := 0; j < kyberN/8; j++ {
- var t [8]uint16
- for k := 0; k < 8; k++ {
- t[k] = uint16((((uint32(freeze(vec.coeffs[8*j+k])) << 11) + kyberQ/2) / kyberQ) & 0x7ff)
- }
-
- r[off+11*j+0] = byte(t[0] & 0xff)
- r[off+11*j+1] = byte((t[0] >> 8) | ((t[1] & 0x1f) << 3))
- r[off+11*j+2] = byte((t[1] >> 5) | ((t[2] & 0x03) << 6))
- r[off+11*j+3] = byte((t[2] >> 2) & 0xff)
- r[off+11*j+4] = byte((t[2] >> 10) | ((t[3] & 0x7f) << 1))
- r[off+11*j+5] = byte((t[3] >> 7) | ((t[4] & 0x0f) << 4))
- r[off+11*j+6] = byte((t[4] >> 4) | ((t[5] & 0x01) << 7))
- r[off+11*j+7] = byte((t[5] >> 1) & 0xff)
- r[off+11*j+8] = byte((t[5] >> 9) | ((t[6] & 0x3f) << 2))
- r[off+11*j+9] = byte((t[6] >> 6) | ((t[7] & 0x07) << 5))
- r[off+11*j+10] = byte((t[7] >> 3))
- }
- off += compressedCoeffSize
- }
-}
-
-// De-serialize and decompress vector of polynomials; approximate inverse of
-// polyVec.compress().
-func (v *polyVec) decompress(a []byte) {
- var off int
- for _, vec := range v.vec {
- for j := 0; j < kyberN/8; j++ {
- vec.coeffs[8*j+0] = uint16((((uint32(a[off+11*j+0]) | ((uint32(a[off+11*j+1]) & 0x07) << 8)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+1] = uint16(((((uint32(a[off+11*j+1]) >> 3) | ((uint32(a[off+11*j+2]) & 0x3f) << 5)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+2] = uint16(((((uint32(a[off+11*j+2]) >> 6) | ((uint32(a[off+11*j+3]) & 0xff) << 2) | ((uint32(a[off+11*j+4]) & 0x01) << 10)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+3] = uint16(((((uint32(a[off+11*j+4]) >> 1) | ((uint32(a[off+11*j+5]) & 0x0f) << 7)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+4] = uint16(((((uint32(a[off+11*j+5]) >> 4) | ((uint32(a[off+11*j+6]) & 0x7f) << 4)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+5] = uint16(((((uint32(a[off+11*j+6]) >> 7) | ((uint32(a[off+11*j+7]) & 0xff) << 1) | ((uint32(a[off+11*j+8]) & 0x03) << 9)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+6] = uint16(((((uint32(a[off+11*j+8]) >> 2) | ((uint32(a[off+11*j+9]) & 0x1f) << 6)) * kyberQ) + 1024) >> 11)
- vec.coeffs[8*j+7] = uint16(((((uint32(a[off+11*j+9]) >> 5) | ((uint32(a[off+11*j+10]) & 0xff) << 3)) * kyberQ) + 1024) >> 11)
- }
- off += compressedCoeffSize
- }
-}
-
-// Serialize vector of polynomials.
-func (v *polyVec) toBytes(r []byte) {
- for i, p := range v.vec {
- p.toBytes(r[i*polySize:])
- }
-}
-
-// De-serialize vector of polynomials; inverse of polyVec.toBytes().
-func (v *polyVec) fromBytes(a []byte) {
- for i, p := range v.vec {
- p.fromBytes(a[i*polySize:])
- }
-}
-
-// Apply forward NTT to all elements of a vector of polynomials.
-func (v *polyVec) ntt() {
- for _, p := range v.vec {
- p.ntt()
- }
-}
-
-// Apply inverse NTT to all elements of a vector of polynomials.
-func (v *polyVec) invntt() {
- for _, p := range v.vec {
- p.invntt()
- }
-}
-
-// Pointwise multiply elements of a and b and accumulate into p.
-func (p *poly) pointwiseAcc(a, b *polyVec) {
- hardwareAccelImpl.pointwiseAccFn(p, a, b)
-}
-
-// Add vectors of polynomials.
-func (v *polyVec) add(a, b *polyVec) {
- for i, p := range v.vec {
- p.add(a.vec[i], b.vec[i])
- }
-}
-
-// Get compressed and serialized size in bytes.
-func (v *polyVec) compressedSize() int {
- return len(v.vec) * compressedCoeffSize
-}
-
-func pointwiseAccRef(p *poly, a, b *polyVec) {
- for j := 0; j < kyberN; j++ {
- t := montgomeryReduce(4613 * uint32(b.vec[0].coeffs[j])) // 4613 = 2^{2*18} % q
- p.coeffs[j] = montgomeryReduce(uint32(a.vec[0].coeffs[j]) * uint32(t))
- for i := 1; i < len(a.vec); i++ { // len(a.vec) == kyberK
- t = montgomeryReduce(4613 * uint32(b.vec[i].coeffs[j]))
- p.coeffs[j] += montgomeryReduce(uint32(a.vec[i].coeffs[j]) * uint32(t))
- }
-
- p.coeffs[j] = barrettReduce(p.coeffs[j])
- }
-}
diff --git a/vendor/blitter.com/go/kyber/precomp.go b/vendor/blitter.com/go/kyber/precomp.go
deleted file mode 100644
index a18cd8f..0000000
--- a/vendor/blitter.com/go/kyber/precomp.go
+++ /dev/null
@@ -1,87 +0,0 @@
-// precomp.go - Precomputed NTT constants.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-// Precomputed constants for the forward NTT and inverse NTT.
-// Computed using Pari/GP as follows:
-//
-// brv=[0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240, \
-// 8,136,72,200,40,168,104,232,24,152,88,216,56,184,120,248, \
-// 4,132,68,196,36,164,100,228,20,148,84,212,52,180,116,244, \
-// 12,140,76,204,44,172,108,236,28,156,92,220,60,188,124,252, \
-// 2,130,66,194,34,162,98,226,18,146,82,210,50,178,114,242, \
-// 10,138,74,202,42,170,106,234,26,154,90,218,58,186,122,250, \
-// 6,134,70,198,38,166,102,230,22,150,86,214,54,182,118,246, \
-// 14,142,78,206,46,174,110,238,30,158,94,222,62,190,126,254, \
-// 1,129,65,193,33,161,97,225,17,145,81,209,49,177,113,241, \
-// 9,137,73,201,41,169,105,233,25,153,89,217,57,185,121,249, \
-// 5,133,69,197,37,165,101,229,21,149,85,213,53,181,117,245, \
-// 13,141,77,205,45,173,109,237,29,157,93,221,61,189,125,253, \
-// 3,131,67,195,35,163,99,227,19,147,83,211,51,179,115,243, \
-// 11,139,75,203,43,171,107,235,27,155,91,219,59,187,123,251, \
-// 7,135,71,199,39,167,103,231,23,151,87,215,55,183,119,247, \
-// 15,143,79,207,47,175,111,239,31,159,95,223,63,191,127,255];
-//
-// q = 7681;
-// n = 256;
-// mont = Mod(2^18,q);
-//
-// g=0; for(i=2,q-1,if(znorder(Mod(i,q)) == 2*n, g=Mod(i,q); break))
-//
-// zetas = lift(vector(n, i, g^(brv[i])*mont))
-// omegas_inv_bitrev_montgomery = lift(vector(n/2, i, (g^2)^(-brv[2*(i-1)+1])*mont))
-// psis_inv_montgomery = lift(vector(n, i, g^(-(i-1))/n*mont))
-
-var zetas = [kyberN]uint16{
- 990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656,
- 3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3,
- 606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157,
- 7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957,
- 3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434,
- 1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851,
- 2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390,
- 6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009,
- 7613, 7295, 2007, 323, 5112, 3716, 2289, 6442, 6965, 2713, 7126, 3401, 963, 6596, 607, 5027,
- 7078, 4484, 5937, 944, 2860, 2680, 5049, 1777, 5850, 3387, 6487, 6777, 4812, 4724, 7077, 186,
- 6848, 6793, 3463, 5877, 1174, 7116, 3077, 5945, 6591, 590, 6643, 1337, 6036, 3991, 1675, 2053,
- 6055, 1162, 1679, 3883, 4311, 2106, 6163, 4486, 6374, 5006, 4576, 4288, 5180, 4102, 282, 6119,
- 7443, 6330, 3184, 4971, 2530, 5325, 4171, 7185, 5175, 5655, 1898, 382, 7211, 43, 5965, 6073,
- 1730, 332, 1577, 3304, 2329, 1699, 6150, 2379, 5113, 333, 3502, 4517, 1480, 1172, 5567, 651,
- 925, 4573, 599, 1367, 4109, 1863, 6929, 1605, 3866, 2065, 4048, 839, 5764, 2447, 2022, 3345,
- 1990, 4067, 2036, 2069, 3567, 7371, 2368, 339, 6947, 2159, 654, 7327, 2768, 6676, 987, 2214,
-}
-
-var omegasInvBitrevMontgomery = [kyberN / 2]uint16{
- 990, 254, 862, 5047, 6586, 5538, 4400, 7103, 2025, 6804, 3858, 1595, 2299, 4345, 1319, 7197,
- 7678, 5213, 1906, 3639, 1749, 2497, 2547, 6100, 343, 538, 7390, 6396, 7418, 1267, 671, 4098,
- 5724, 491, 4146, 412, 4143, 5625, 2397, 5596, 6122, 2750, 2196, 1541, 2539, 2079, 2459, 274,
- 7524, 6539, 5015, 6097, 7040, 5220, 2716, 1752, 28, 2552, 133, 4441, 6719, 2298, 6952, 7075,
- 4672, 5559, 6830, 1442, 2979, 485, 4549, 4224, 6065, 1944, 5, 1553, 5046, 3436, 4766, 959,
- 3291, 3684, 6031, 2137, 1597, 2908, 1825, 6132, 98, 1251, 4306, 4022, 4314, 362, 1289, 5560,
- 3830, 6724, 6671, 1215, 2281, 4899, 5074, 5988, 5041, 1883, 2822, 7024, 2920, 594, 6189, 6662,
- 3247, 771, 5822, 1742, 4206, 3686, 776, 5987, 8, 4021, 38, 5658, 3017, 6143, 889, 4216,
-}
-
-var psisInvMontgomery = [kyberN]uint16{
- 1024, 4972, 5779, 6907, 4943, 4168, 315, 5580, 90, 497, 1123, 142, 4710, 5527, 2443, 4871,
- 698, 2489, 2394, 4003, 684, 2241, 2390, 7224, 5072, 2064, 4741, 1687, 6841, 482, 7441, 1235,
- 2126, 4742, 2802, 5744, 6287, 4933, 699, 3604, 1297, 2127, 5857, 1705, 3868, 3779, 4397, 2177,
- 159, 622, 2240, 1275, 640, 6948, 4572, 5277, 209, 2605, 1157, 7328, 5817, 3191, 1662, 2009,
- 4864, 574, 2487, 164, 6197, 4436, 7257, 3462, 4268, 4281, 3414, 4515, 3170, 1290, 2003, 5855,
- 7156, 6062, 7531, 1732, 3249, 4884, 7512, 3590, 1049, 2123, 1397, 6093, 3691, 6130, 6541, 3946,
- 6258, 3322, 1788, 4241, 4900, 2309, 1400, 1757, 400, 502, 6698, 2338, 3011, 668, 7444, 4580,
- 6516, 6795, 2959, 4136, 3040, 2279, 6355, 3943, 2913, 6613, 7416, 4084, 6508, 5556, 4054, 3782,
- 61, 6567, 2212, 779, 632, 5709, 5667, 4923, 4911, 6893, 4695, 4164, 3536, 2287, 7594, 2848,
- 3267, 1911, 3128, 546, 1991, 156, 4958, 5531, 6903, 483, 875, 138, 250, 2234, 2266, 7222,
- 2842, 4258, 812, 6703, 232, 5207, 6650, 2585, 1900, 6225, 4932, 7265, 4701, 3173, 4635, 6393,
- 227, 7313, 4454, 4284, 6759, 1224, 5223, 1447, 395, 2608, 4502, 4037, 189, 3348, 54, 6443,
- 2210, 6230, 2826, 1780, 3002, 5995, 1955, 6102, 6045, 3938, 5019, 4417, 1434, 1262, 1507, 5847,
- 5917, 7157, 7177, 6434, 7537, 741, 4348, 1309, 145, 374, 2236, 4496, 5028, 6771, 6923, 7421,
- 1978, 1023, 3857, 6876, 1102, 7451, 4704, 6518, 1344, 765, 384, 5705, 1207, 1630, 4734, 1563,
- 6839, 5933, 1954, 4987, 7142, 5814, 7527, 4953, 7637, 4707, 2182, 5734, 2818, 541, 4097, 5641,
-}
diff --git a/vendor/blitter.com/go/kyber/reduce.go b/vendor/blitter.com/go/kyber/reduce.go
deleted file mode 100644
index ada3eec..0000000
--- a/vendor/blitter.com/go/kyber/reduce.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// reduce.go - Montgomery, Barret, and Full reduction.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to the software, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package kyber
-
-const (
- qinv = 7679 // -inverse_mod(q,2^18)
- rlog = 18
-)
-
-// Montgomery reduction; given a 32-bit integer a, computes 16-bit integer
-// congruent to a * R^-1 mod q, where R=2^18 (see value of rlog).
-func montgomeryReduce(a uint32) uint16 {
- u := a * qinv
- u &= (1 << rlog) - 1
- u *= kyberQ
- a += u
- return uint16(a >> rlog)
-}
-
-// Barrett reduction; given a 16-bit integer a, computes 16-bit integer
-// congruent to a mod q in {0,...,11768}.
-func barrettReduce(a uint16) uint16 {
- u := uint32(a >> 13) // ((uint32_t) a * sinv) >> 16
- u *= kyberQ
- a -= uint16(u)
- return a
-}
-
-// Full reduction; given a 16-bit integer a, computes unsigned integer a mod q.
-func freeze(x uint16) uint16 {
- r := barrettReduce(x)
-
- m := r - kyberQ
- c := int16(m)
- c >>= 15
- r = m ^ ((r ^ m) & uint16(c))
- return r
-}
diff --git a/vendor/blitter.com/go/mtwist/mtwist.go b/vendor/blitter.com/go/mtwist/mtwist.go
deleted file mode 100644
index 4527525..0000000
--- a/vendor/blitter.com/go/mtwist/mtwist.go
+++ /dev/null
@@ -1,125 +0,0 @@
-// MersenneTwister
-// From https://gist.github.com/cuixin/1b8b6bd7bfbde8fe76e8
-package MersenneTwister
-
-import (
- "crypto"
-
- _ "crypto/sha512"
-)
-
-const N = 312
-const M = 156
-const MATRIX_A = 0xB5026F5AA96619E9
-const UPPER_MASK = 0xFFFFFFFF80000000
-const LOWER_MASK = 0x7FFFFFFF
-
-type MT19937_64 struct {
- array [N]uint64 //state vector
- index uint64 // array index
-}
-
-func New() *MT19937_64 {
- return &MT19937_64{
- index: N + 1,
- }
-}
-
-func (m *MT19937_64) _initstate() {
- // Recommendations abound that mtwist should throw away 1st 10000 or so
- // of initial state
- for i := 0; i < 10000; i++ {
- _ = m.Int63()
- }
-}
-
-func (m *MT19937_64) Seed(seed int64) {
- m.array[0] = uint64(seed)
- for m.index = 1; m.index < N; m.index++ {
- m.array[m.index] = (6364136223846793005*(m.array[m.index-1]^(m.array[m.index-1]>>62)) + m.index)
- }
- m._initstate()
- //fmt.Printf("final array(s):%v\n", m.array)
-}
-
-func _bytesToUint64(b []byte) (r uint64) {
- r = uint64(b[0])<<56 +
- uint64(b[1])<<48 +
- uint64(b[2])<<40 +
- uint64(b[3])<<32 +
- uint64(b[4])<<24 +
- uint64(b[5])<<16 +
- uint64(b[6])<<8 +
- uint64(b[7])
- return
-}
-
-func (m *MT19937_64) SeedFullState(s []byte) {
- //fmt.Printf("s:%v\n", s)
- if len(s) < N*8 {
- // Expand s if shorter than mtwist array state
- ha := crypto.SHA512
- h := ha.New()
- shortfallChunks := ((N * 8) - len(s)) / h.Size()
- //shortfallRem := ((N * 8) - len(s)) % h.Size()
- //fmt.Printf("chunks, rem:%d,%d\n", shortfallChunks, shortfallRem)
- idx := 0
- for idx < shortfallChunks {
- _, _ = h.Write(s)
- s = h.Sum(s)
- idx += 1
- }
- _, _ = h.Write(s)
- s = h.Sum(s)
- //fmt.Printf("exp s:%v\n", s)
- }
-
- for idx := 0; idx < N; {
- m.array[idx] = _bytesToUint64(s[idx*8 : (idx*8)+8])
- idx += 1
- }
- //fmt.Printf("final array(xs):%v\n", m.array)
- m.index = 0
- m._initstate()
-}
-
-func (m *MT19937_64) Int63() uint64 {
- var i int
- var x uint64
- mag01 := []uint64{0, MATRIX_A}
- if m.index >= N {
- if m.index == N+1 {
- m.Seed(int64(5489))
- }
-
- for i = 0; i < N-M; i++ {
- x = (m.array[i] & UPPER_MASK) | (m.array[i+1] & LOWER_MASK)
- m.array[i] = m.array[i+(M)] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
- }
- for ; i < N-1; i++ {
- x = (m.array[i] & UPPER_MASK) | (m.array[i+1] & LOWER_MASK)
- m.array[i] = m.array[i+(M-N)] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
- }
- x = (m.array[N-1] & UPPER_MASK) | (m.array[0] & LOWER_MASK)
- m.array[N-1] = m.array[M-1] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
- m.index = 0
- }
- x = m.array[m.index]
- m.index++
- x ^= (x >> 29) & 0x5555555555555555
- x ^= (x << 17) & 0x71D67FFFEDA60000
- x ^= (x << 37) & 0xFFF7EEE000000000
- x ^= (x >> 43)
- return x
-}
-
-func (m *MT19937_64) IntN(value uint64) uint64 {
- return m.Int63() % value
-}
-
-func (m *MT19937_64) Read(p []byte) (n int, err error) {
- for idx := 0; idx < len(p); idx++ {
- p[idx] = byte( (m.Int63()>>47) % 256)
- }
- return n, nil
-}
diff --git a/vendor/blitter.com/go/newhope/LICENSE b/vendor/blitter.com/go/newhope/LICENSE
deleted file mode 100644
index 6ca207e..0000000
--- a/vendor/blitter.com/go/newhope/LICENSE
+++ /dev/null
@@ -1,122 +0,0 @@
-Creative Commons Legal Code
-
-CC0 1.0 Universal
-
- CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
- LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
- ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
- INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
- REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
- PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
- THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
- HEREUNDER.
-
-Statement of Purpose
-
-The laws of most jurisdictions throughout the world automatically confer
-exclusive Copyright and Related Rights (defined below) upon the creator
-and subsequent owner(s) (each and all, an "owner") of an original work of
-authorship and/or a database (each, a "Work").
-
-Certain owners wish to permanently relinquish those rights to a Work for
-the purpose of contributing to a commons of creative, cultural and
-scientific works ("Commons") that the public can reliably and without fear
-of later claims of infringement build upon, modify, incorporate in other
-works, reuse and redistribute as freely as possible in any form whatsoever
-and for any purposes, including without limitation commercial purposes.
-These owners may contribute to the Commons to promote the ideal of a free
-culture and the further production of creative, cultural and scientific
-works, or to gain reputation or greater distribution for their Work in
-part through the use and efforts of others.
-
-For these and/or other purposes and motivations, and without any
-expectation of additional consideration or compensation, the person
-associating CC0 with a Work (the "Affirmer"), to the extent that he or she
-is an owner of Copyright and Related Rights in the Work, voluntarily
-elects to apply CC0 to the Work and publicly distribute the Work under its
-terms, with knowledge of his or her Copyright and Related Rights in the
-Work and the meaning and intended legal effect of CC0 on those rights.
-
-1. Copyright and Related Rights. A Work made available under CC0 may be
-protected by copyright and related or neighboring rights ("Copyright and
-Related Rights"). Copyright and Related Rights include, but are not
-limited to, the following:
-
- i. the right to reproduce, adapt, distribute, perform, display,
- communicate, and translate a Work;
- ii. moral rights retained by the original author(s) and/or performer(s);
-iii. publicity and privacy rights pertaining to a person's image or
- likeness depicted in a Work;
- iv. rights protecting against unfair competition in regards to a Work,
- subject to the limitations in paragraph 4(a), below;
- v. rights protecting the extraction, dissemination, use and reuse of data
- in a Work;
- vi. database rights (such as those arising under Directive 96/9/EC of the
- European Parliament and of the Council of 11 March 1996 on the legal
- protection of databases, and under any national implementation
- thereof, including any amended or successor version of such
- directive); and
-vii. other similar, equivalent or corresponding rights throughout the
- world based on applicable law or treaty, and any national
- implementations thereof.
-
-2. Waiver. To the greatest extent permitted by, but not in contravention
-of, applicable law, Affirmer hereby overtly, fully, permanently,
-irrevocably and unconditionally waives, abandons, and surrenders all of
-Affirmer's Copyright and Related Rights and associated claims and causes
-of action, whether now known or unknown (including existing as well as
-future claims and causes of action), in the Work (i) in all territories
-worldwide, (ii) for the maximum duration provided by applicable law or
-treaty (including future time extensions), (iii) in any current or future
-medium and for any number of copies, and (iv) for any purpose whatsoever,
-including without limitation commercial, advertising or promotional
-purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
-member of the public at large and to the detriment of Affirmer's heirs and
-successors, fully intending that such Waiver shall not be subject to
-revocation, rescission, cancellation, termination, or any other legal or
-equitable action to disrupt the quiet enjoyment of the Work by the public
-as contemplated by Affirmer's express Statement of Purpose.
-
-3. Public License Fallback. Should any part of the Waiver for any reason
-be judged legally invalid or ineffective under applicable law, then the
-Waiver shall be preserved to the maximum extent permitted taking into
-account Affirmer's express Statement of Purpose. In addition, to the
-extent the Waiver is so judged Affirmer hereby grants to each affected
-person a royalty-free, non transferable, non sublicensable, non exclusive,
-irrevocable and unconditional license to exercise Affirmer's Copyright and
-Related Rights in the Work (i) in all territories worldwide, (ii) for the
-maximum duration provided by applicable law or treaty (including future
-time extensions), (iii) in any current or future medium and for any number
-of copies, and (iv) for any purpose whatsoever, including without
-limitation commercial, advertising or promotional purposes (the
-"License"). The License shall be deemed effective as of the date CC0 was
-applied by Affirmer to the Work. Should any part of the License for any
-reason be judged legally invalid or ineffective under applicable law, such
-partial invalidity or ineffectiveness shall not invalidate the remainder
-of the License, and in such case Affirmer hereby affirms that he or she
-will not (i) exercise any of his or her remaining Copyright and Related
-Rights in the Work or (ii) assert any associated claims and causes of
-action with respect to the Work, in either case contrary to Affirmer's
-express Statement of Purpose.
-
-4. Limitations and Disclaimers.
-
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
- surrendered, licensed or otherwise affected by this document.
- b. Affirmer offers the Work as-is and makes no representations or
- warranties of any kind concerning the Work, express, implied,
- statutory or otherwise, including without limitation warranties of
- title, merchantability, fitness for a particular purpose, non
- infringement, or the absence of latent or other defects, accuracy, or
- the present or absence of errors, whether or not discoverable, all to
- the greatest extent permissible under applicable law.
- c. Affirmer disclaims responsibility for clearing rights of other persons
- that may apply to the Work or any use thereof, including without
- limitation any person's Copyright and Related Rights in the Work.
- Further, Affirmer disclaims responsibility for obtaining any necessary
- consents, permissions or other rights required for any use of the
- Work.
- d. Affirmer understands and acknowledges that Creative Commons is not a
- party to this document and has no duty or obligation with respect to
- this CC0 or use of the Work.
-
diff --git a/vendor/blitter.com/go/newhope/batcher.go b/vendor/blitter.com/go/newhope/batcher.go
deleted file mode 100644
index 2f99489..0000000
--- a/vendor/blitter.com/go/newhope/batcher.go
+++ /dev/null
@@ -1,929 +0,0 @@
-// batcher.go - Constant time polynomial sampler.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-func batcher84(x []uint16) {
- // In theory this should probably be inlined.
- compareAndSwap := func(x []uint16, i, j int) {
- const _5q = 5 * paramQ
- var c int32
- var t uint16
-
- c = _5q - 1 - int32(x[16*i])
- c >>= 31
- t = x[16*i] ^ x[16*j]
- t &= uint16(c)
- x[16*i] ^= t
- x[16*j] ^= t
- }
-
- compareAndSwap(x, 0, 1)
- compareAndSwap(x, 2, 3)
- compareAndSwap(x, 0, 2)
- compareAndSwap(x, 1, 3)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 4, 5)
- compareAndSwap(x, 6, 7)
- compareAndSwap(x, 4, 6)
- compareAndSwap(x, 5, 7)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 0, 4)
- compareAndSwap(x, 2, 6)
- compareAndSwap(x, 2, 4)
- compareAndSwap(x, 1, 5)
- compareAndSwap(x, 3, 7)
- compareAndSwap(x, 3, 5)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 3, 4)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 8, 9)
- compareAndSwap(x, 10, 11)
- compareAndSwap(x, 8, 10)
- compareAndSwap(x, 9, 11)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 12, 13)
- compareAndSwap(x, 14, 15)
- compareAndSwap(x, 12, 14)
- compareAndSwap(x, 13, 15)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 8, 12)
- compareAndSwap(x, 10, 14)
- compareAndSwap(x, 10, 12)
- compareAndSwap(x, 9, 13)
- compareAndSwap(x, 11, 15)
- compareAndSwap(x, 11, 13)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 11, 12)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 0, 8)
- compareAndSwap(x, 4, 12)
- compareAndSwap(x, 4, 8)
- compareAndSwap(x, 2, 10)
- compareAndSwap(x, 6, 14)
- compareAndSwap(x, 6, 10)
- compareAndSwap(x, 2, 4)
- compareAndSwap(x, 6, 8)
- compareAndSwap(x, 10, 12)
- compareAndSwap(x, 1, 9)
- compareAndSwap(x, 5, 13)
- compareAndSwap(x, 5, 9)
- compareAndSwap(x, 3, 11)
- compareAndSwap(x, 7, 15)
- compareAndSwap(x, 7, 11)
- compareAndSwap(x, 3, 5)
- compareAndSwap(x, 7, 9)
- compareAndSwap(x, 11, 13)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 3, 4)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 7, 8)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 11, 12)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 16, 17)
- compareAndSwap(x, 18, 19)
- compareAndSwap(x, 16, 18)
- compareAndSwap(x, 17, 19)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 20, 21)
- compareAndSwap(x, 22, 23)
- compareAndSwap(x, 20, 22)
- compareAndSwap(x, 21, 23)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 16, 20)
- compareAndSwap(x, 18, 22)
- compareAndSwap(x, 18, 20)
- compareAndSwap(x, 17, 21)
- compareAndSwap(x, 19, 23)
- compareAndSwap(x, 19, 21)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 19, 20)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 24, 25)
- compareAndSwap(x, 26, 27)
- compareAndSwap(x, 24, 26)
- compareAndSwap(x, 25, 27)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 28, 29)
- compareAndSwap(x, 30, 31)
- compareAndSwap(x, 28, 30)
- compareAndSwap(x, 29, 31)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 24, 28)
- compareAndSwap(x, 26, 30)
- compareAndSwap(x, 26, 28)
- compareAndSwap(x, 25, 29)
- compareAndSwap(x, 27, 31)
- compareAndSwap(x, 27, 29)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 27, 28)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 16, 24)
- compareAndSwap(x, 20, 28)
- compareAndSwap(x, 20, 24)
- compareAndSwap(x, 18, 26)
- compareAndSwap(x, 22, 30)
- compareAndSwap(x, 22, 26)
- compareAndSwap(x, 18, 20)
- compareAndSwap(x, 22, 24)
- compareAndSwap(x, 26, 28)
- compareAndSwap(x, 17, 25)
- compareAndSwap(x, 21, 29)
- compareAndSwap(x, 21, 25)
- compareAndSwap(x, 19, 27)
- compareAndSwap(x, 23, 31)
- compareAndSwap(x, 23, 27)
- compareAndSwap(x, 19, 21)
- compareAndSwap(x, 23, 25)
- compareAndSwap(x, 27, 29)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 19, 20)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 23, 24)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 27, 28)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 0, 16)
- compareAndSwap(x, 8, 24)
- compareAndSwap(x, 8, 16)
- compareAndSwap(x, 4, 20)
- compareAndSwap(x, 12, 28)
- compareAndSwap(x, 12, 20)
- compareAndSwap(x, 4, 8)
- compareAndSwap(x, 12, 16)
- compareAndSwap(x, 20, 24)
- compareAndSwap(x, 2, 18)
- compareAndSwap(x, 10, 26)
- compareAndSwap(x, 10, 18)
- compareAndSwap(x, 6, 22)
- compareAndSwap(x, 14, 30)
- compareAndSwap(x, 14, 22)
- compareAndSwap(x, 6, 10)
- compareAndSwap(x, 14, 18)
- compareAndSwap(x, 22, 26)
- compareAndSwap(x, 2, 4)
- compareAndSwap(x, 6, 8)
- compareAndSwap(x, 10, 12)
- compareAndSwap(x, 14, 16)
- compareAndSwap(x, 18, 20)
- compareAndSwap(x, 22, 24)
- compareAndSwap(x, 26, 28)
- compareAndSwap(x, 1, 17)
- compareAndSwap(x, 9, 25)
- compareAndSwap(x, 9, 17)
- compareAndSwap(x, 5, 21)
- compareAndSwap(x, 13, 29)
- compareAndSwap(x, 13, 21)
- compareAndSwap(x, 5, 9)
- compareAndSwap(x, 13, 17)
- compareAndSwap(x, 21, 25)
- compareAndSwap(x, 3, 19)
- compareAndSwap(x, 11, 27)
- compareAndSwap(x, 11, 19)
- compareAndSwap(x, 7, 23)
- compareAndSwap(x, 15, 31)
- compareAndSwap(x, 15, 23)
- compareAndSwap(x, 7, 11)
- compareAndSwap(x, 15, 19)
- compareAndSwap(x, 23, 27)
- compareAndSwap(x, 3, 5)
- compareAndSwap(x, 7, 9)
- compareAndSwap(x, 11, 13)
- compareAndSwap(x, 15, 17)
- compareAndSwap(x, 19, 21)
- compareAndSwap(x, 23, 25)
- compareAndSwap(x, 27, 29)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 3, 4)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 7, 8)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 11, 12)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 15, 16)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 19, 20)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 23, 24)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 27, 28)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 32, 33)
- compareAndSwap(x, 34, 35)
- compareAndSwap(x, 32, 34)
- compareAndSwap(x, 33, 35)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 36, 37)
- compareAndSwap(x, 38, 39)
- compareAndSwap(x, 36, 38)
- compareAndSwap(x, 37, 39)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 32, 36)
- compareAndSwap(x, 34, 38)
- compareAndSwap(x, 34, 36)
- compareAndSwap(x, 33, 37)
- compareAndSwap(x, 35, 39)
- compareAndSwap(x, 35, 37)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 35, 36)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 40, 41)
- compareAndSwap(x, 42, 43)
- compareAndSwap(x, 40, 42)
- compareAndSwap(x, 41, 43)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 44, 45)
- compareAndSwap(x, 46, 47)
- compareAndSwap(x, 44, 46)
- compareAndSwap(x, 45, 47)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 40, 44)
- compareAndSwap(x, 42, 46)
- compareAndSwap(x, 42, 44)
- compareAndSwap(x, 41, 45)
- compareAndSwap(x, 43, 47)
- compareAndSwap(x, 43, 45)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 43, 44)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 32, 40)
- compareAndSwap(x, 36, 44)
- compareAndSwap(x, 36, 40)
- compareAndSwap(x, 34, 42)
- compareAndSwap(x, 38, 46)
- compareAndSwap(x, 38, 42)
- compareAndSwap(x, 34, 36)
- compareAndSwap(x, 38, 40)
- compareAndSwap(x, 42, 44)
- compareAndSwap(x, 33, 41)
- compareAndSwap(x, 37, 45)
- compareAndSwap(x, 37, 41)
- compareAndSwap(x, 35, 43)
- compareAndSwap(x, 39, 47)
- compareAndSwap(x, 39, 43)
- compareAndSwap(x, 35, 37)
- compareAndSwap(x, 39, 41)
- compareAndSwap(x, 43, 45)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 35, 36)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 39, 40)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 43, 44)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 48, 49)
- compareAndSwap(x, 50, 51)
- compareAndSwap(x, 48, 50)
- compareAndSwap(x, 49, 51)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 52, 53)
- compareAndSwap(x, 54, 55)
- compareAndSwap(x, 52, 54)
- compareAndSwap(x, 53, 55)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 48, 52)
- compareAndSwap(x, 50, 54)
- compareAndSwap(x, 50, 52)
- compareAndSwap(x, 49, 53)
- compareAndSwap(x, 51, 55)
- compareAndSwap(x, 51, 53)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 51, 52)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 56, 57)
- compareAndSwap(x, 58, 59)
- compareAndSwap(x, 56, 58)
- compareAndSwap(x, 57, 59)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 60, 61)
- compareAndSwap(x, 62, 63)
- compareAndSwap(x, 60, 62)
- compareAndSwap(x, 61, 63)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 56, 60)
- compareAndSwap(x, 58, 62)
- compareAndSwap(x, 58, 60)
- compareAndSwap(x, 57, 61)
- compareAndSwap(x, 59, 63)
- compareAndSwap(x, 59, 61)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 59, 60)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 48, 56)
- compareAndSwap(x, 52, 60)
- compareAndSwap(x, 52, 56)
- compareAndSwap(x, 50, 58)
- compareAndSwap(x, 54, 62)
- compareAndSwap(x, 54, 58)
- compareAndSwap(x, 50, 52)
- compareAndSwap(x, 54, 56)
- compareAndSwap(x, 58, 60)
- compareAndSwap(x, 49, 57)
- compareAndSwap(x, 53, 61)
- compareAndSwap(x, 53, 57)
- compareAndSwap(x, 51, 59)
- compareAndSwap(x, 55, 63)
- compareAndSwap(x, 55, 59)
- compareAndSwap(x, 51, 53)
- compareAndSwap(x, 55, 57)
- compareAndSwap(x, 59, 61)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 51, 52)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 55, 56)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 59, 60)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 32, 48)
- compareAndSwap(x, 40, 56)
- compareAndSwap(x, 40, 48)
- compareAndSwap(x, 36, 52)
- compareAndSwap(x, 44, 60)
- compareAndSwap(x, 44, 52)
- compareAndSwap(x, 36, 40)
- compareAndSwap(x, 44, 48)
- compareAndSwap(x, 52, 56)
- compareAndSwap(x, 34, 50)
- compareAndSwap(x, 42, 58)
- compareAndSwap(x, 42, 50)
- compareAndSwap(x, 38, 54)
- compareAndSwap(x, 46, 62)
- compareAndSwap(x, 46, 54)
- compareAndSwap(x, 38, 42)
- compareAndSwap(x, 46, 50)
- compareAndSwap(x, 54, 58)
- compareAndSwap(x, 34, 36)
- compareAndSwap(x, 38, 40)
- compareAndSwap(x, 42, 44)
- compareAndSwap(x, 46, 48)
- compareAndSwap(x, 50, 52)
- compareAndSwap(x, 54, 56)
- compareAndSwap(x, 58, 60)
- compareAndSwap(x, 33, 49)
- compareAndSwap(x, 41, 57)
- compareAndSwap(x, 41, 49)
- compareAndSwap(x, 37, 53)
- compareAndSwap(x, 45, 61)
- compareAndSwap(x, 45, 53)
- compareAndSwap(x, 37, 41)
- compareAndSwap(x, 45, 49)
- compareAndSwap(x, 53, 57)
- compareAndSwap(x, 35, 51)
- compareAndSwap(x, 43, 59)
- compareAndSwap(x, 43, 51)
- compareAndSwap(x, 39, 55)
- compareAndSwap(x, 47, 63)
- compareAndSwap(x, 47, 55)
- compareAndSwap(x, 39, 43)
- compareAndSwap(x, 47, 51)
- compareAndSwap(x, 55, 59)
- compareAndSwap(x, 35, 37)
- compareAndSwap(x, 39, 41)
- compareAndSwap(x, 43, 45)
- compareAndSwap(x, 47, 49)
- compareAndSwap(x, 51, 53)
- compareAndSwap(x, 55, 57)
- compareAndSwap(x, 59, 61)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 35, 36)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 39, 40)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 43, 44)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 47, 48)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 51, 52)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 55, 56)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 59, 60)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 0, 32)
- compareAndSwap(x, 16, 48)
- compareAndSwap(x, 16, 32)
- compareAndSwap(x, 8, 40)
- compareAndSwap(x, 24, 56)
- compareAndSwap(x, 24, 40)
- compareAndSwap(x, 8, 16)
- compareAndSwap(x, 24, 32)
- compareAndSwap(x, 40, 48)
- compareAndSwap(x, 4, 36)
- compareAndSwap(x, 20, 52)
- compareAndSwap(x, 20, 36)
- compareAndSwap(x, 12, 44)
- compareAndSwap(x, 28, 60)
- compareAndSwap(x, 28, 44)
- compareAndSwap(x, 12, 20)
- compareAndSwap(x, 28, 36)
- compareAndSwap(x, 44, 52)
- compareAndSwap(x, 4, 8)
- compareAndSwap(x, 12, 16)
- compareAndSwap(x, 20, 24)
- compareAndSwap(x, 28, 32)
- compareAndSwap(x, 36, 40)
- compareAndSwap(x, 44, 48)
- compareAndSwap(x, 52, 56)
- compareAndSwap(x, 2, 34)
- compareAndSwap(x, 18, 50)
- compareAndSwap(x, 18, 34)
- compareAndSwap(x, 10, 42)
- compareAndSwap(x, 26, 58)
- compareAndSwap(x, 26, 42)
- compareAndSwap(x, 10, 18)
- compareAndSwap(x, 26, 34)
- compareAndSwap(x, 42, 50)
- compareAndSwap(x, 6, 38)
- compareAndSwap(x, 22, 54)
- compareAndSwap(x, 22, 38)
- compareAndSwap(x, 14, 46)
- compareAndSwap(x, 30, 62)
- compareAndSwap(x, 30, 46)
- compareAndSwap(x, 14, 22)
- compareAndSwap(x, 30, 38)
- compareAndSwap(x, 46, 54)
- compareAndSwap(x, 6, 10)
- compareAndSwap(x, 14, 18)
- compareAndSwap(x, 22, 26)
- compareAndSwap(x, 30, 34)
- compareAndSwap(x, 38, 42)
- compareAndSwap(x, 46, 50)
- compareAndSwap(x, 54, 58)
- compareAndSwap(x, 2, 4)
- compareAndSwap(x, 6, 8)
- compareAndSwap(x, 10, 12)
- compareAndSwap(x, 14, 16)
- compareAndSwap(x, 18, 20)
- compareAndSwap(x, 22, 24)
- compareAndSwap(x, 26, 28)
- compareAndSwap(x, 30, 32)
- compareAndSwap(x, 34, 36)
- compareAndSwap(x, 38, 40)
- compareAndSwap(x, 42, 44)
- compareAndSwap(x, 46, 48)
- compareAndSwap(x, 50, 52)
- compareAndSwap(x, 54, 56)
- compareAndSwap(x, 58, 60)
- compareAndSwap(x, 1, 33)
- compareAndSwap(x, 17, 49)
- compareAndSwap(x, 17, 33)
- compareAndSwap(x, 9, 41)
- compareAndSwap(x, 25, 57)
- compareAndSwap(x, 25, 41)
- compareAndSwap(x, 9, 17)
- compareAndSwap(x, 25, 33)
- compareAndSwap(x, 41, 49)
- compareAndSwap(x, 5, 37)
- compareAndSwap(x, 21, 53)
- compareAndSwap(x, 21, 37)
- compareAndSwap(x, 13, 45)
- compareAndSwap(x, 29, 61)
- compareAndSwap(x, 29, 45)
- compareAndSwap(x, 13, 21)
- compareAndSwap(x, 29, 37)
- compareAndSwap(x, 45, 53)
- compareAndSwap(x, 5, 9)
- compareAndSwap(x, 13, 17)
- compareAndSwap(x, 21, 25)
- compareAndSwap(x, 29, 33)
- compareAndSwap(x, 37, 41)
- compareAndSwap(x, 45, 49)
- compareAndSwap(x, 53, 57)
- compareAndSwap(x, 3, 35)
- compareAndSwap(x, 19, 51)
- compareAndSwap(x, 19, 35)
- compareAndSwap(x, 11, 43)
- compareAndSwap(x, 27, 59)
- compareAndSwap(x, 27, 43)
- compareAndSwap(x, 11, 19)
- compareAndSwap(x, 27, 35)
- compareAndSwap(x, 43, 51)
- compareAndSwap(x, 7, 39)
- compareAndSwap(x, 23, 55)
- compareAndSwap(x, 23, 39)
- compareAndSwap(x, 15, 47)
- compareAndSwap(x, 31, 63)
- compareAndSwap(x, 31, 47)
- compareAndSwap(x, 15, 23)
- compareAndSwap(x, 31, 39)
- compareAndSwap(x, 47, 55)
- compareAndSwap(x, 7, 11)
- compareAndSwap(x, 15, 19)
- compareAndSwap(x, 23, 27)
- compareAndSwap(x, 31, 35)
- compareAndSwap(x, 39, 43)
- compareAndSwap(x, 47, 51)
- compareAndSwap(x, 55, 59)
- compareAndSwap(x, 3, 5)
- compareAndSwap(x, 7, 9)
- compareAndSwap(x, 11, 13)
- compareAndSwap(x, 15, 17)
- compareAndSwap(x, 19, 21)
- compareAndSwap(x, 23, 25)
- compareAndSwap(x, 27, 29)
- compareAndSwap(x, 31, 33)
- compareAndSwap(x, 35, 37)
- compareAndSwap(x, 39, 41)
- compareAndSwap(x, 43, 45)
- compareAndSwap(x, 47, 49)
- compareAndSwap(x, 51, 53)
- compareAndSwap(x, 55, 57)
- compareAndSwap(x, 59, 61)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 3, 4)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 7, 8)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 11, 12)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 15, 16)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 19, 20)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 23, 24)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 27, 28)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 31, 32)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 35, 36)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 39, 40)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 43, 44)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 47, 48)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 51, 52)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 55, 56)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 59, 60)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 64, 65)
- compareAndSwap(x, 66, 67)
- compareAndSwap(x, 64, 66)
- compareAndSwap(x, 65, 67)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 68, 69)
- compareAndSwap(x, 70, 71)
- compareAndSwap(x, 68, 70)
- compareAndSwap(x, 69, 71)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 64, 68)
- compareAndSwap(x, 66, 70)
- compareAndSwap(x, 66, 68)
- compareAndSwap(x, 65, 69)
- compareAndSwap(x, 67, 71)
- compareAndSwap(x, 67, 69)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 67, 68)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 72, 73)
- compareAndSwap(x, 74, 75)
- compareAndSwap(x, 72, 74)
- compareAndSwap(x, 73, 75)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 76, 77)
- compareAndSwap(x, 78, 79)
- compareAndSwap(x, 76, 78)
- compareAndSwap(x, 77, 79)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 72, 76)
- compareAndSwap(x, 74, 78)
- compareAndSwap(x, 74, 76)
- compareAndSwap(x, 73, 77)
- compareAndSwap(x, 75, 79)
- compareAndSwap(x, 75, 77)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 75, 76)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 64, 72)
- compareAndSwap(x, 68, 76)
- compareAndSwap(x, 68, 72)
- compareAndSwap(x, 66, 74)
- compareAndSwap(x, 70, 78)
- compareAndSwap(x, 70, 74)
- compareAndSwap(x, 66, 68)
- compareAndSwap(x, 70, 72)
- compareAndSwap(x, 74, 76)
- compareAndSwap(x, 65, 73)
- compareAndSwap(x, 69, 77)
- compareAndSwap(x, 69, 73)
- compareAndSwap(x, 67, 75)
- compareAndSwap(x, 71, 79)
- compareAndSwap(x, 71, 75)
- compareAndSwap(x, 67, 69)
- compareAndSwap(x, 71, 73)
- compareAndSwap(x, 75, 77)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 67, 68)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 71, 72)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 75, 76)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 80, 81)
- compareAndSwap(x, 82, 83)
- compareAndSwap(x, 80, 82)
- compareAndSwap(x, 81, 83)
- compareAndSwap(x, 81, 82)
- compareAndSwap(x, 81, 82)
- compareAndSwap(x, 81, 82)
- compareAndSwap(x, 64, 80)
- compareAndSwap(x, 72, 80)
- compareAndSwap(x, 68, 72)
- compareAndSwap(x, 76, 80)
- compareAndSwap(x, 66, 82)
- compareAndSwap(x, 74, 82)
- compareAndSwap(x, 70, 74)
- compareAndSwap(x, 78, 82)
- compareAndSwap(x, 66, 68)
- compareAndSwap(x, 70, 72)
- compareAndSwap(x, 74, 76)
- compareAndSwap(x, 78, 80)
- compareAndSwap(x, 65, 81)
- compareAndSwap(x, 73, 81)
- compareAndSwap(x, 69, 73)
- compareAndSwap(x, 77, 81)
- compareAndSwap(x, 67, 83)
- compareAndSwap(x, 75, 83)
- compareAndSwap(x, 71, 75)
- compareAndSwap(x, 79, 83)
- compareAndSwap(x, 67, 69)
- compareAndSwap(x, 71, 73)
- compareAndSwap(x, 75, 77)
- compareAndSwap(x, 79, 81)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 67, 68)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 71, 72)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 75, 76)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 79, 80)
- compareAndSwap(x, 81, 82)
- compareAndSwap(x, 72, 80)
- compareAndSwap(x, 68, 72)
- compareAndSwap(x, 76, 80)
- compareAndSwap(x, 74, 82)
- compareAndSwap(x, 70, 74)
- compareAndSwap(x, 78, 82)
- compareAndSwap(x, 66, 68)
- compareAndSwap(x, 70, 72)
- compareAndSwap(x, 74, 76)
- compareAndSwap(x, 78, 80)
- compareAndSwap(x, 73, 81)
- compareAndSwap(x, 69, 73)
- compareAndSwap(x, 77, 81)
- compareAndSwap(x, 75, 83)
- compareAndSwap(x, 71, 75)
- compareAndSwap(x, 79, 83)
- compareAndSwap(x, 67, 69)
- compareAndSwap(x, 71, 73)
- compareAndSwap(x, 75, 77)
- compareAndSwap(x, 79, 81)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 67, 68)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 71, 72)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 75, 76)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 79, 80)
- compareAndSwap(x, 81, 82)
- compareAndSwap(x, 0, 64)
- compareAndSwap(x, 32, 64)
- compareAndSwap(x, 16, 80)
- compareAndSwap(x, 48, 80)
- compareAndSwap(x, 16, 32)
- compareAndSwap(x, 48, 64)
- compareAndSwap(x, 8, 72)
- compareAndSwap(x, 40, 72)
- compareAndSwap(x, 24, 40)
- compareAndSwap(x, 56, 72)
- compareAndSwap(x, 8, 16)
- compareAndSwap(x, 24, 32)
- compareAndSwap(x, 40, 48)
- compareAndSwap(x, 56, 64)
- compareAndSwap(x, 72, 80)
- compareAndSwap(x, 4, 68)
- compareAndSwap(x, 36, 68)
- compareAndSwap(x, 20, 36)
- compareAndSwap(x, 52, 68)
- compareAndSwap(x, 12, 76)
- compareAndSwap(x, 44, 76)
- compareAndSwap(x, 28, 44)
- compareAndSwap(x, 60, 76)
- compareAndSwap(x, 12, 20)
- compareAndSwap(x, 28, 36)
- compareAndSwap(x, 44, 52)
- compareAndSwap(x, 60, 68)
- compareAndSwap(x, 4, 8)
- compareAndSwap(x, 12, 16)
- compareAndSwap(x, 20, 24)
- compareAndSwap(x, 28, 32)
- compareAndSwap(x, 36, 40)
- compareAndSwap(x, 44, 48)
- compareAndSwap(x, 52, 56)
- compareAndSwap(x, 60, 64)
- compareAndSwap(x, 68, 72)
- compareAndSwap(x, 76, 80)
- compareAndSwap(x, 2, 66)
- compareAndSwap(x, 34, 66)
- compareAndSwap(x, 18, 82)
- compareAndSwap(x, 50, 82)
- compareAndSwap(x, 18, 34)
- compareAndSwap(x, 50, 66)
- compareAndSwap(x, 10, 74)
- compareAndSwap(x, 42, 74)
- compareAndSwap(x, 26, 42)
- compareAndSwap(x, 58, 74)
- compareAndSwap(x, 10, 18)
- compareAndSwap(x, 26, 34)
- compareAndSwap(x, 42, 50)
- compareAndSwap(x, 58, 66)
- compareAndSwap(x, 74, 82)
- compareAndSwap(x, 6, 70)
- compareAndSwap(x, 38, 70)
- compareAndSwap(x, 22, 38)
- compareAndSwap(x, 54, 70)
- compareAndSwap(x, 14, 78)
- compareAndSwap(x, 46, 78)
- compareAndSwap(x, 30, 46)
- compareAndSwap(x, 62, 78)
- compareAndSwap(x, 14, 22)
- compareAndSwap(x, 30, 38)
- compareAndSwap(x, 46, 54)
- compareAndSwap(x, 62, 70)
- compareAndSwap(x, 6, 10)
- compareAndSwap(x, 14, 18)
- compareAndSwap(x, 22, 26)
- compareAndSwap(x, 30, 34)
- compareAndSwap(x, 38, 42)
- compareAndSwap(x, 46, 50)
- compareAndSwap(x, 54, 58)
- compareAndSwap(x, 62, 66)
- compareAndSwap(x, 70, 74)
- compareAndSwap(x, 78, 82)
- compareAndSwap(x, 2, 4)
- compareAndSwap(x, 6, 8)
- compareAndSwap(x, 10, 12)
- compareAndSwap(x, 14, 16)
- compareAndSwap(x, 18, 20)
- compareAndSwap(x, 22, 24)
- compareAndSwap(x, 26, 28)
- compareAndSwap(x, 30, 32)
- compareAndSwap(x, 34, 36)
- compareAndSwap(x, 38, 40)
- compareAndSwap(x, 42, 44)
- compareAndSwap(x, 46, 48)
- compareAndSwap(x, 50, 52)
- compareAndSwap(x, 54, 56)
- compareAndSwap(x, 58, 60)
- compareAndSwap(x, 62, 64)
- compareAndSwap(x, 66, 68)
- compareAndSwap(x, 70, 72)
- compareAndSwap(x, 74, 76)
- compareAndSwap(x, 78, 80)
- compareAndSwap(x, 1, 65)
- compareAndSwap(x, 33, 65)
- compareAndSwap(x, 17, 81)
- compareAndSwap(x, 49, 81)
- compareAndSwap(x, 17, 33)
- compareAndSwap(x, 49, 65)
- compareAndSwap(x, 9, 73)
- compareAndSwap(x, 41, 73)
- compareAndSwap(x, 25, 41)
- compareAndSwap(x, 57, 73)
- compareAndSwap(x, 9, 17)
- compareAndSwap(x, 25, 33)
- compareAndSwap(x, 41, 49)
- compareAndSwap(x, 57, 65)
- compareAndSwap(x, 73, 81)
- compareAndSwap(x, 5, 69)
- compareAndSwap(x, 37, 69)
- compareAndSwap(x, 21, 37)
- compareAndSwap(x, 53, 69)
- compareAndSwap(x, 13, 77)
- compareAndSwap(x, 45, 77)
- compareAndSwap(x, 29, 45)
- compareAndSwap(x, 61, 77)
- compareAndSwap(x, 13, 21)
- compareAndSwap(x, 29, 37)
- compareAndSwap(x, 45, 53)
- compareAndSwap(x, 61, 69)
- compareAndSwap(x, 5, 9)
- compareAndSwap(x, 13, 17)
- compareAndSwap(x, 21, 25)
- compareAndSwap(x, 29, 33)
- compareAndSwap(x, 37, 41)
- compareAndSwap(x, 45, 49)
- compareAndSwap(x, 53, 57)
- compareAndSwap(x, 61, 65)
- compareAndSwap(x, 69, 73)
- compareAndSwap(x, 77, 81)
- compareAndSwap(x, 3, 67)
- compareAndSwap(x, 35, 67)
- compareAndSwap(x, 19, 83)
- compareAndSwap(x, 51, 83)
- compareAndSwap(x, 19, 35)
- compareAndSwap(x, 51, 67)
- compareAndSwap(x, 11, 75)
- compareAndSwap(x, 43, 75)
- compareAndSwap(x, 27, 43)
- compareAndSwap(x, 59, 75)
- compareAndSwap(x, 11, 19)
- compareAndSwap(x, 27, 35)
- compareAndSwap(x, 43, 51)
- compareAndSwap(x, 59, 67)
- compareAndSwap(x, 75, 83)
- compareAndSwap(x, 7, 71)
- compareAndSwap(x, 39, 71)
- compareAndSwap(x, 23, 39)
- compareAndSwap(x, 55, 71)
- compareAndSwap(x, 15, 79)
- compareAndSwap(x, 47, 79)
- compareAndSwap(x, 31, 47)
- compareAndSwap(x, 63, 79)
- compareAndSwap(x, 15, 23)
- compareAndSwap(x, 31, 39)
- compareAndSwap(x, 47, 55)
- compareAndSwap(x, 63, 71)
- compareAndSwap(x, 7, 11)
- compareAndSwap(x, 15, 19)
- compareAndSwap(x, 23, 27)
- compareAndSwap(x, 31, 35)
- compareAndSwap(x, 39, 43)
- compareAndSwap(x, 47, 51)
- compareAndSwap(x, 55, 59)
- compareAndSwap(x, 63, 67)
- compareAndSwap(x, 71, 75)
- compareAndSwap(x, 79, 83)
- compareAndSwap(x, 3, 5)
- compareAndSwap(x, 7, 9)
- compareAndSwap(x, 11, 13)
- compareAndSwap(x, 15, 17)
- compareAndSwap(x, 19, 21)
- compareAndSwap(x, 23, 25)
- compareAndSwap(x, 27, 29)
- compareAndSwap(x, 31, 33)
- compareAndSwap(x, 35, 37)
- compareAndSwap(x, 39, 41)
- compareAndSwap(x, 43, 45)
- compareAndSwap(x, 47, 49)
- compareAndSwap(x, 51, 53)
- compareAndSwap(x, 55, 57)
- compareAndSwap(x, 59, 61)
- compareAndSwap(x, 63, 65)
- compareAndSwap(x, 67, 69)
- compareAndSwap(x, 71, 73)
- compareAndSwap(x, 75, 77)
- compareAndSwap(x, 79, 81)
- compareAndSwap(x, 1, 2)
- compareAndSwap(x, 3, 4)
- compareAndSwap(x, 5, 6)
- compareAndSwap(x, 7, 8)
- compareAndSwap(x, 9, 10)
- compareAndSwap(x, 11, 12)
- compareAndSwap(x, 13, 14)
- compareAndSwap(x, 15, 16)
- compareAndSwap(x, 17, 18)
- compareAndSwap(x, 19, 20)
- compareAndSwap(x, 21, 22)
- compareAndSwap(x, 23, 24)
- compareAndSwap(x, 25, 26)
- compareAndSwap(x, 27, 28)
- compareAndSwap(x, 29, 30)
- compareAndSwap(x, 31, 32)
- compareAndSwap(x, 33, 34)
- compareAndSwap(x, 35, 36)
- compareAndSwap(x, 37, 38)
- compareAndSwap(x, 39, 40)
- compareAndSwap(x, 41, 42)
- compareAndSwap(x, 43, 44)
- compareAndSwap(x, 45, 46)
- compareAndSwap(x, 47, 48)
- compareAndSwap(x, 49, 50)
- compareAndSwap(x, 51, 52)
- compareAndSwap(x, 53, 54)
- compareAndSwap(x, 55, 56)
- compareAndSwap(x, 57, 58)
- compareAndSwap(x, 59, 60)
- compareAndSwap(x, 61, 62)
- compareAndSwap(x, 63, 64)
- compareAndSwap(x, 65, 66)
- compareAndSwap(x, 67, 68)
- compareAndSwap(x, 69, 70)
- compareAndSwap(x, 71, 72)
- compareAndSwap(x, 73, 74)
- compareAndSwap(x, 75, 76)
- compareAndSwap(x, 77, 78)
- compareAndSwap(x, 79, 80)
- compareAndSwap(x, 81, 82)
-}
diff --git a/vendor/blitter.com/go/newhope/error_correction.go b/vendor/blitter.com/go/newhope/error_correction.go
deleted file mode 100644
index 7855648..0000000
--- a/vendor/blitter.com/go/newhope/error_correction.go
+++ /dev/null
@@ -1,135 +0,0 @@
-// error_correction.go - NewHope key exchange error correction.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-import "blitter.com/go/chacha20"
-
-func abs(v int32) int32 {
- mask := v >> 31
- return (v ^ mask) - mask
-}
-
-func f(v0, v1 *int32, x int32) int32 {
- // The`ref` code uses uint32 for x, but none of the values ever get large
- // enough for that, and that would be cast-tastic due to Go being Go.
-
- // Next 6 lines compute t = x/PARAM_Q
- b := x * 2730
- t := b >> 25
- b = x - t*paramQ
- b = (paramQ - 1) - b
- b >>= 31
- t -= b
-
- r := t & 1
- xit := t >> 1
- *v0 = xit + r // v0 = round(x/(2*PARAM_Q))
-
- t--
- r = t & 1
- *v1 = (t >> 1) + r
-
- return abs(x - ((*v0) * 2 * paramQ))
-}
-
-func g(x int32) int32 {
- // Next 6 lines compute t = x/(4 *PARAMQ)
- b := x * 2730
- t := b >> 27
- b = x - t*(paramQ*4)
- b = (paramQ * 4) - b
- b >>= 31
- t -= b
-
- c := t & 1
- t = (t >> 1) + c // t = round(x/(8*PARAM_Q))
-
- t *= 8 * paramQ
-
- return abs(t - x)
-}
-
-func llDecode(xi0, xi1, xi2, xi3 int32) int16 {
- t := g(xi0)
- t += g(xi1)
- t += g(xi2)
- t += g(xi3)
-
- t -= 8 * paramQ
- t >>= 31
- return int16(t & 1)
-}
-
-func (c *poly) helpRec(v *poly, seed *[SeedBytes]byte, nonce byte) {
- var v0, v1, vTmp [4]int32
- var k int32
- var rand [32]byte
- var n [8]byte
-
- n[7] = nonce
-
- stream, err := chacha20.New(seed[:], n[:])
- if err != nil {
- panic(err)
- }
- stream.KeyStream(rand[:])
- stream.Reset()
- defer memwipe(rand[:])
-
- for i := uint(0); i < 256; i++ {
- rBit := int32((rand[i>>3] >> (i & 7)) & 1)
-
- vTmp[0], vTmp[1], vTmp[2], vTmp[3] = int32(v.coeffs[i]), int32(v.coeffs[256+i]), int32(v.coeffs[512+i]), int32(v.coeffs[768+i])
-
- // newhope-20151209 - New version of the reconciliation.
- k = f(&v0[0], &v1[0], 8*vTmp[0]+4*rBit)
- k += f(&v0[1], &v1[1], 8*vTmp[1]+4*rBit)
- k += f(&v0[2], &v1[2], 8*vTmp[2]+4*rBit)
- k += f(&v0[3], &v1[3], 8*vTmp[3]+4*rBit)
-
- k = (2*paramQ - 1 - k) >> 31
-
- vTmp[0] = ((^k) & v0[0]) ^ (k & v1[0])
- vTmp[1] = ((^k) & v0[1]) ^ (k & v1[1])
- vTmp[2] = ((^k) & v0[2]) ^ (k & v1[2])
- vTmp[3] = ((^k) & v0[3]) ^ (k & v1[3])
-
- c.coeffs[0+i] = uint16((vTmp[0] - vTmp[3]) & 3)
- c.coeffs[256+i] = uint16((vTmp[1] - vTmp[3]) & 3)
- c.coeffs[512+i] = uint16((vTmp[2] - vTmp[3]) & 3)
- c.coeffs[768+i] = uint16((-k + 2*vTmp[3]) & 3)
- }
-
- for i := range vTmp {
- vTmp[i] = 0
- }
-}
-
-func rec(key *[32]byte, v, c *poly) {
- var tmp, vTmp, cTmp [4]int32
- for i := range key {
- key[i] = 0
- }
-
- for i := uint(0); i < 256; i++ {
- vTmp[0], vTmp[1], vTmp[2], vTmp[3] = int32(v.coeffs[i]), int32(v.coeffs[256+i]), int32(v.coeffs[512+i]), int32(v.coeffs[768+i])
- cTmp[0], cTmp[1], cTmp[2], cTmp[3] = int32(c.coeffs[i]), int32(c.coeffs[256+i]), int32(c.coeffs[512+i]), int32(c.coeffs[768+i])
- tmp[0] = 16*paramQ + 8*vTmp[0] - paramQ*(2*cTmp[0]+cTmp[3])
- tmp[1] = 16*paramQ + 8*vTmp[1] - paramQ*(2*cTmp[1]+cTmp[3])
- tmp[2] = 16*paramQ + 8*vTmp[2] - paramQ*(2*cTmp[2]+cTmp[3])
- tmp[3] = 16*paramQ + 8*vTmp[3] - paramQ*(cTmp[3])
-
- key[i>>3] |= byte(llDecode(tmp[0], tmp[1], tmp[2], tmp[3]) << (i & 7))
- }
-
- for i := 0; i < 4; i++ {
- tmp[i] = 0
- vTmp[i] = 0
- cTmp[i] = 0
- }
-}
diff --git a/vendor/blitter.com/go/newhope/newhope.go b/vendor/blitter.com/go/newhope/newhope.go
deleted file mode 100644
index 380d943..0000000
--- a/vendor/blitter.com/go/newhope/newhope.go
+++ /dev/null
@@ -1,220 +0,0 @@
-// newhope.go - NewHope interface.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-// Package newhope implements a key exchange based on the Ring Learning with
-// Errors Problem. It is a mechanical port of the Public Domain implementation
-// by Erdem Alkim, Léo Ducas, Thomas Pöppelmann, and Peter Schwabe.
-//
-// For more information see:
-// https://cryptojedi.org/papers/newhope-20161119.pdf
-// https://cryptojedi.org/papers/newhopesimple-20161217.pdf
-//
-package newhope
-
-import (
- "io"
-
- "golang.org/x/crypto/sha3"
-)
-
-const (
- // SharedSecretSize is the length of a Shared Secret in bytes.
- SharedSecretSize = 32
-
- // UpstreamVersion is the version of the upstream package this
- // implementation is compatible with.
- UpstreamVersion = "20160815"
-
- // RecBytes is the length of the reconciliation data in bytes.
- RecBytes = 256
-
- // SendASize is the length of Alice's public key in bytes.
- SendASize = PolyBytes + SeedBytes
-
- // SendBSize is the length of Bob's public key in bytes.
- SendBSize = PolyBytes + RecBytes
-)
-
-// TorSampling enables the constant time generation of the `a` parameter,
-// where every successful `a` generation will take the same amount of time.
-// Most users will probably not want to enable this as it does come with a
-// performance penalty. Alice and Bob *MUST* agree on the sampling method,
-// or the key exchange will fail.
-var TorSampling = false
-
-func encodeA(r []byte, pk *poly, seed *[SeedBytes]byte) {
- pk.toBytes(r)
- for i := 0; i < SeedBytes; i++ {
- r[PolyBytes+i] = seed[i]
- }
-}
-
-func decodeA(pk *poly, seed *[SeedBytes]byte, r []byte) {
- pk.fromBytes(r)
- for i := range seed {
- seed[i] = r[PolyBytes+i]
- }
-}
-
-func encodeB(r []byte, b *poly, c *poly) {
- b.toBytes(r)
- for i := 0; i < paramN/4; i++ {
- r[PolyBytes+i] = byte(c.coeffs[4*i]) | byte(c.coeffs[4*i+1]<<2) | byte(c.coeffs[4*i+2]<<4) | byte(c.coeffs[4*i+3]<<6)
- }
-}
-
-func decodeB(b *poly, c *poly, r []byte) {
- b.fromBytes(r)
- for i := 0; i < paramN/4; i++ {
- c.coeffs[4*i+0] = uint16(r[PolyBytes+i]) & 0x03
- c.coeffs[4*i+1] = uint16(r[PolyBytes+i]>>2) & 0x03
- c.coeffs[4*i+2] = uint16(r[PolyBytes+i]>>4) & 0x03
- c.coeffs[4*i+3] = uint16(r[PolyBytes+i] >> 6)
- }
-}
-
-func memwipe(b []byte) {
- for i := range b {
- b[i] = 0
- }
-}
-
-// PublicKeyAlice is Alice's NewHope public key.
-type PublicKeyAlice struct {
- Send [SendASize]byte
-}
-
-// PrivateKeyAlice is Alice's NewHope private key.
-type PrivateKeyAlice struct {
- sk poly
-}
-
-// Reset clears all sensitive information such that it no longer appears in
-// memory.
-func (k *PrivateKeyAlice) Reset() {
- k.sk.reset()
-}
-
-// GenerateKeyPairAlice returns a private/public key pair. The private key is
-// generated using the given reader, which must return random data. The
-// receiver side of the key exchange (aka "Bob") MUST use KeyExchangeBob()
-// instead of this routine.
-func GenerateKeyPairAlice(rand io.Reader) (*PrivateKeyAlice, *PublicKeyAlice, error) {
- var a, e, pk, r poly
- var seed, noiseSeed [SeedBytes]byte
-
- // seed <- Sample({0, 1}^256)
- if _, err := io.ReadFull(rand, seed[:]); err != nil {
- return nil, nil, err
- }
- seed = sha3.Sum256(seed[:]) // Don't send output of system RNG.
- // a <- Parse(SHAKE-128(seed))
- a.uniform(&seed, TorSampling)
-
- // s, e <- Sample(psi(n, 12))
- if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
- return nil, nil, err
- }
- defer memwipe(noiseSeed[:])
- privKey := new(PrivateKeyAlice)
- privKey.sk.getNoise(&noiseSeed, 0)
- privKey.sk.ntt()
- e.getNoise(&noiseSeed, 1)
- e.ntt()
-
- // b <- as + e
- pubKey := new(PublicKeyAlice)
- r.pointwise(&privKey.sk, &a)
- pk.add(&e, &r)
- encodeA(pubKey.Send[:], &pk, &seed)
-
- return privKey, pubKey, nil
-}
-
-// PublicKeyBob is Bob's NewHope public key.
-type PublicKeyBob struct {
- Send [SendBSize]byte
-}
-
-// KeyExchangeBob is the Responder side of the NewHope key exchange. The
-// shared secret and "public key" (key + reconciliation data) are generated
-// using the given reader, which must return random data.
-func KeyExchangeBob(rand io.Reader, alicePk *PublicKeyAlice) (*PublicKeyBob, []byte, error) {
- var pka, a, sp, ep, u, v, epp, r poly
- var seed, noiseSeed [SeedBytes]byte
-
- if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
- return nil, nil, err
- }
- defer memwipe(noiseSeed[:])
-
- // a <- Parse(SHAKE-128(seed))
- decodeA(&pka, &seed, alicePk.Send[:])
- a.uniform(&seed, TorSampling)
-
- // s', e', e'' <- Sample(psi(n, 12))
- sp.getNoise(&noiseSeed, 0)
- sp.ntt()
- ep.getNoise(&noiseSeed, 1)
- ep.ntt()
- epp.getNoise(&noiseSeed, 2)
-
- // u <- as' + e'
- u.pointwise(&a, &sp)
- u.add(&u, &ep)
-
- // v <- bs' + e''
- v.pointwise(&pka, &sp)
- v.invNtt()
- v.add(&v, &epp)
-
- // r <- Sample(HelpRec(v))
- r.helpRec(&v, &noiseSeed, 3)
-
- pubKey := new(PublicKeyBob)
- encodeB(pubKey.Send[:], &u, &r)
-
- // nu <- Rec(v, r)
- var nu [SharedSecretSize]byte
- rec(&nu, &v, &r)
-
- // mu <- SHA3-256(nu)
- mu := sha3.Sum256(nu[:])
-
- // Scrub the sensitive stuff...
- memwipe(nu[:])
- sp.reset()
- v.reset()
-
- return pubKey, mu[:], nil
-}
-
-// KeyExchangeAlice is the Initiaitor side of the NewHope key exchange. The
-// provided private key is obliterated prior to returning.
-func KeyExchangeAlice(bobPk *PublicKeyBob, aliceSk *PrivateKeyAlice) ([]byte, error) {
- var u, r, vp poly
-
- decodeB(&u, &r, bobPk.Send[:])
-
- // v' <- us
- vp.pointwise(&aliceSk.sk, &u)
- vp.invNtt()
-
- // nu <- Rec(v', r)
- var nu [SharedSecretSize]byte
- rec(&nu, &vp, &r)
-
- // mu <- Sha3-256(nu)
- mu := sha3.Sum256(nu[:])
-
- // Scrub the sensitive stuff...
- memwipe(nu[:])
- vp.reset()
- aliceSk.Reset()
-
- return mu[:], nil
-}
diff --git a/vendor/blitter.com/go/newhope/newhope_simple.go b/vendor/blitter.com/go/newhope/newhope_simple.go
deleted file mode 100644
index 4c4774b..0000000
--- a/vendor/blitter.com/go/newhope/newhope_simple.go
+++ /dev/null
@@ -1,166 +0,0 @@
-// newhope_simple.go - NewHope-Simple interface.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-import (
- "io"
-
- "golang.org/x/crypto/sha3"
-)
-
-const (
- // HighBytes is the length of the encoded secret in bytes.
- HighBytes = 384
-
- // SendASimpleSize is the length of Alice's NewHope-Simple public key in
- // bytes.
- SendASimpleSize = PolyBytes + SeedBytes
-
- // SendBSimpleSize is the length of Bob's NewHope-Simple public key in
- // bytes.
- SendBSimpleSize = PolyBytes + HighBytes
-)
-
-func encodeBSimple(r []byte, b *poly, v *poly) {
- b.toBytes(r)
- v.compress(r[PolyBytes:])
-}
-
-func decodeBSimple(b *poly, v *poly, r []byte) {
- b.fromBytes(r)
- v.decompress(r[PolyBytes:])
-}
-
-// PublicKeySimpleAlice is Alice's NewHope-Simple public key.
-type PublicKeySimpleAlice struct {
- Send [SendASimpleSize]byte
-}
-
-// PrivateKeySimpleAlice is Alice's NewHope-Simple private key.
-type PrivateKeySimpleAlice struct {
- sk poly
-}
-
-// Reset clears all sensitive information such that it no longer appears in
-// memory.
-func (k *PrivateKeySimpleAlice) Reset() {
- k.sk.reset()
-}
-
-// GenerateKeyPairSimpleAlice returns a NewHope-Simple private/public key pair.
-// The private key is generated using the given reader, which must return
-// random data. The receiver side of the key exchange (aka "Bob") MUST use
-// KeyExchangeSimpleBob() instead of this routine.
-func GenerateKeyPairSimpleAlice(rand io.Reader) (*PrivateKeySimpleAlice, *PublicKeySimpleAlice, error) {
- var a, e, pk, r poly
- var seed, noiseSeed [SeedBytes]byte
-
- if _, err := io.ReadFull(rand, seed[:]); err != nil {
- return nil, nil, err
- }
- seed = sha3.Sum256(seed[:]) // Don't send output of system RNG.
- a.uniform(&seed, TorSampling)
-
- if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
- return nil, nil, err
- }
- defer memwipe(noiseSeed[:])
-
- privKey := new(PrivateKeySimpleAlice)
- privKey.sk.getNoise(&noiseSeed, 0)
- privKey.sk.ntt()
- e.getNoise(&noiseSeed, 1)
- e.ntt()
-
- pubKey := new(PublicKeySimpleAlice)
- r.pointwise(&privKey.sk, &a)
- pk.add(&e, &r)
- encodeA(pubKey.Send[:], &pk, &seed)
-
- return privKey, pubKey, nil
-}
-
-// PublicKeySimpleBob is Bob's NewHope-Simple public key.
-type PublicKeySimpleBob struct {
- Send [SendBSimpleSize]byte
-}
-
-// KeyExchangeSimpleBob is the Responder side of the NewHope-Simple key
-// exchange. The shared secret and "public key" are generated using the
-// given reader, which must return random data.
-func KeyExchangeSimpleBob(rand io.Reader, alicePk *PublicKeySimpleAlice) (*PublicKeySimpleBob, []byte, error) {
- var pka, a, sp, ep, bp, v, epp, m poly
- var seed, noiseSeed [SeedBytes]byte
-
- if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
- return nil, nil, err
- }
- defer memwipe(noiseSeed[:])
-
- var sharedKey [SharedSecretSize]byte
- if _, err := io.ReadFull(rand, sharedKey[:]); err != nil {
- return nil, nil, err
- }
- defer memwipe(sharedKey[:])
- sharedKey = sha3.Sum256(sharedKey[:])
- m.fromMsg(sharedKey[:])
-
- decodeA(&pka, &seed, alicePk.Send[:])
- a.uniform(&seed, TorSampling)
-
- sp.getNoise(&noiseSeed, 0)
- sp.ntt()
- ep.getNoise(&noiseSeed, 1)
- ep.ntt()
-
- bp.pointwise(&a, &sp)
- bp.add(&bp, &ep)
-
- v.pointwise(&pka, &sp)
- v.invNtt()
-
- epp.getNoise(&noiseSeed, 2)
- v.add(&v, &epp)
- v.add(&v, &m) // add key
-
- pubKey := new(PublicKeySimpleBob)
- encodeBSimple(pubKey.Send[:], &bp, &v)
- mu := sha3.Sum256(sharedKey[:])
-
- // Scrub the sensitive stuff...
- sp.reset()
- v.reset()
- m.reset()
-
- return pubKey, mu[:], nil
-}
-
-// KeyExchangeSimpleAlice is the Initiaitor side of the NewHope-Simple key
-// exchange. The provided private key is obliterated prior to returning.
-func KeyExchangeSimpleAlice(bobPk *PublicKeySimpleBob, aliceSk *PrivateKeySimpleAlice) ([]byte, error) {
- var v, bp, k poly
-
- decodeBSimple(&bp, &v, bobPk.Send[:])
- k.pointwise(&aliceSk.sk, &bp)
- k.invNtt()
-
- k.sub(&k, &v)
-
- var sharedKey [SharedSecretSize]byte
- k.toMsg(sharedKey[:])
-
- // mu <- Sha3-256(v')
- mu := sha3.Sum256(sharedKey[:])
-
- // Scrub the sensitive stuff...
- memwipe(sharedKey[:])
- k.reset()
- aliceSk.Reset()
-
- return mu[:], nil
-}
diff --git a/vendor/blitter.com/go/newhope/ntt.go b/vendor/blitter.com/go/newhope/ntt.go
deleted file mode 100644
index 58d1f2a..0000000
--- a/vendor/blitter.com/go/newhope/ntt.go
+++ /dev/null
@@ -1,131 +0,0 @@
-// ntt.go - NewHope Number Theoretic Transform.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-var bitrevTable = [paramN]uint16{
- 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, 192, 704, 448, 960,
- 32, 544, 288, 800, 160, 672, 416, 928, 96, 608, 352, 864, 224, 736, 480,
- 992, 16, 528, 272, 784, 144, 656, 400, 912, 80, 592, 336, 848, 208, 720,
- 464, 976, 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880,
- 240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, 72, 584, 328,
- 840, 200, 712, 456, 968, 40, 552, 296, 808, 168, 680, 424, 936, 104,
- 616, 360, 872, 232, 744, 488, 1000, 24, 536, 280, 792, 152, 664, 408,
- 920, 88, 600, 344, 856, 216, 728, 472, 984, 56, 568, 312, 824, 184, 696,
- 440, 952, 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772,
- 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, 36, 548, 292,
- 804, 164, 676, 420, 932, 100, 612, 356, 868, 228, 740, 484, 996, 20,
- 532, 276, 788, 148, 660, 404, 916, 84, 596, 340, 852, 212, 724, 468,
- 980, 52, 564, 308, 820, 180, 692, 436, 948, 116, 628, 372, 884, 244,
- 756, 500, 1012, 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332,
- 844, 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, 108,
- 620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, 156, 668, 412,
- 924, 92, 604, 348, 860, 220, 732, 476, 988, 60, 572, 316, 828, 188, 700,
- 444, 956, 124, 636, 380, 892, 252, 764, 508, 1020, 2, 514, 258, 770,
- 130, 642, 386, 898, 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290,
- 802, 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, 18, 530,
- 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, 210, 722, 466, 978, 50,
- 562, 306, 818, 178, 690, 434, 946, 114, 626, 370, 882, 242, 754, 498,
- 1010, 10, 522, 266, 778, 138, 650, 394, 906, 74, 586, 330, 842, 202,
- 714, 458, 970, 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362,
- 874, 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, 90,
- 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, 186, 698, 442,
- 954, 122, 634, 378, 890, 250, 762, 506, 1018, 6, 518, 262, 774, 134,
- 646, 390, 902, 70, 582, 326, 838, 198, 710, 454, 966, 38, 550, 294, 806,
- 166, 678, 422, 934, 102, 614, 358, 870, 230, 742, 486, 998, 22, 534,
- 278, 790, 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, 54,
- 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, 246, 758, 502,
- 1014, 14, 526, 270, 782, 142, 654, 398, 910, 78, 590, 334, 846, 206,
- 718, 462, 974, 46, 558, 302, 814, 174, 686, 430, 942, 110, 622, 366,
- 878, 238, 750, 494, 1006, 30, 542, 286, 798, 158, 670, 414, 926, 94,
- 606, 350, 862, 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446,
- 958, 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, 129,
- 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, 33, 545, 289, 801,
- 161, 673, 417, 929, 97, 609, 353, 865, 225, 737, 481, 993, 17, 529, 273,
- 785, 145, 657, 401, 913, 81, 593, 337, 849, 209, 721, 465, 977, 49, 561,
- 305, 817, 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009,
- 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, 201, 713, 457,
- 969, 41, 553, 297, 809, 169, 681, 425, 937, 105, 617, 361, 873, 233,
- 745, 489, 1001, 25, 537, 281, 793, 153, 665, 409, 921, 89, 601, 345,
- 857, 217, 729, 473, 985, 57, 569, 313, 825, 185, 697, 441, 953, 121,
- 633, 377, 889, 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389,
- 901, 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, 165, 677,
- 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, 21, 533, 277, 789,
- 149, 661, 405, 917, 85, 597, 341, 853, 213, 725, 469, 981, 53, 565, 309,
- 821, 181, 693, 437, 949, 117, 629, 373, 885, 245, 757, 501, 1013, 13,
- 525, 269, 781, 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461,
- 973, 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, 237,
- 749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, 93, 605, 349,
- 861, 221, 733, 477, 989, 61, 573, 317, 829, 189, 701, 445, 957, 125,
- 637, 381, 893, 253, 765, 509, 1021, 3, 515, 259, 771, 131, 643, 387,
- 899, 67, 579, 323, 835, 195, 707, 451, 963, 35, 547, 291, 803, 163, 675,
- 419, 931, 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, 147,
- 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, 51, 563, 307, 819,
- 179, 691, 435, 947, 115, 627, 371, 883, 243, 755, 499, 1011, 11, 523,
- 267, 779, 139, 651, 395, 907, 75, 587, 331, 843, 203, 715, 459, 971, 43,
- 555, 299, 811, 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491,
- 1003, 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, 219,
- 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, 123, 635, 379,
- 891, 251, 763, 507, 1019, 7, 519, 263, 775, 135, 647, 391, 903, 71, 583,
- 327, 839, 199, 711, 455, 967, 39, 551, 295, 807, 167, 679, 423, 935,
- 103, 615, 359, 871, 231, 743, 487, 999, 23, 535, 279, 791, 151, 663,
- 407, 919, 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, 183,
- 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, 15, 527, 271,
- 783, 143, 655, 399, 911, 79, 591, 335, 847, 207, 719, 463, 975, 47, 559,
- 303, 815, 175, 687, 431, 943, 111, 623, 367, 879, 239, 751, 495, 1007,
- 31, 543, 287, 799, 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479,
- 991, 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, 255,
- 767, 511, 1023,
-}
-
-func (p *poly) bitrev() {
- for i, v := range p.coeffs {
- r := bitrevTable[i]
- if uint16(i) < r {
- p.coeffs[i] = p.coeffs[r]
- p.coeffs[r] = v
- }
- }
-}
-
-func (p *poly) mulCoefficients(factors *[paramN]uint16) {
- for i, v := range factors {
- p.coeffs[i] = montgomeryReduce(uint32(p.coeffs[i]) * uint32(v))
- }
-}
-
-func ntt(a *[paramN]uint16, omega *[paramN / 2]uint16) {
- var distance uint
-
- for i := uint(0); i < 10; i += 2 {
- // Even level.
- distance = (1 << i)
- for start := uint(0); start < distance; start++ {
- jTwiddle := 0
- for j := start; j < paramN-1; j += 2 * distance {
- w := uint32(omega[jTwiddle])
- jTwiddle++
- tmp := a[j]
- a[j] = tmp + a[j+distance]
- a[j+distance] = montgomeryReduce(w * (uint32(tmp) + 3*paramQ - uint32(a[j+distance])))
- }
- }
-
- // Odd level.
- distance <<= 1
- for start := uint(0); start < distance; start++ {
- jTwiddle := 0
- for j := start; j < paramN-1; j += 2 * distance {
- w := uint32(omega[jTwiddle])
- jTwiddle++
- tmp := a[j]
- a[j] = barrettReduce(tmp + a[j+distance])
- a[j+distance] = montgomeryReduce(w * (uint32(tmp) + 3*paramQ - uint32(a[j+distance])))
- }
- }
- }
-}
diff --git a/vendor/blitter.com/go/newhope/params.go b/vendor/blitter.com/go/newhope/params.go
deleted file mode 100644
index 572236b..0000000
--- a/vendor/blitter.com/go/newhope/params.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// params.go - NewHope parameters.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-const (
- paramN = 1024
- paramK = 16 // used in sampler
- paramQ = 12289
-
- // SeedBytes is the size of the seed in bytes.
- SeedBytes = 32
-)
diff --git a/vendor/blitter.com/go/newhope/poly.go b/vendor/blitter.com/go/newhope/poly.go
deleted file mode 100644
index 860126e..0000000
--- a/vendor/blitter.com/go/newhope/poly.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// poly.go - NewHope polynomial.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-import (
- "encoding/binary"
-
- "blitter.com/go/chacha20"
- "golang.org/x/crypto/sha3"
-)
-
-const (
- // PolyBytes is the length of an encoded polynomial in bytes.
- PolyBytes = 1792
-
- shake128Rate = 168 // Stupid that this isn't exposed.
-)
-
-type poly struct {
- coeffs [paramN]uint16
-}
-
-func (p *poly) reset() {
- for i := range p.coeffs {
- p.coeffs[i] = 0
- }
-}
-
-func (p *poly) fromBytes(a []byte) {
- for i := 0; i < paramN/4; i++ {
- p.coeffs[4*i+0] = uint16(a[7*i+0]) | ((uint16(a[7*i+1]) & 0x3f) << 8)
- p.coeffs[4*i+1] = (uint16(a[7*i+1]) >> 6) | (uint16(a[7*i+2]) << 2) | ((uint16(a[7*i+3]) & 0x0f) << 10)
-
- p.coeffs[4*i+2] = (uint16(a[7*i+3]) >> 4) | (uint16(a[7*i+4]) << 4) | ((uint16(a[7*i+5]) & 0x03) << 12)
- p.coeffs[4*i+3] = (uint16(a[7*i+5]) >> 2) | (uint16(a[7*i+6]) << 6)
- }
-}
-
-func (p *poly) toBytes(r []byte) {
- for i := 0; i < paramN/4; i++ {
- // Make sure that coefficients have only 14 bits.
- t0 := barrettReduce(p.coeffs[4*i+0])
- t1 := barrettReduce(p.coeffs[4*i+1])
- t2 := barrettReduce(p.coeffs[4*i+2])
- t3 := barrettReduce(p.coeffs[4*i+3])
-
- // Make sure that coefficients are in [0,q]
- m := t0 - paramQ
- c := int16(m)
- c >>= 15
- t0 = m ^ ((t0 ^ m) & uint16(c))
-
- m = t1 - paramQ
- c = int16(m)
- c >>= 15
- t1 = m ^ ((t1 ^ m) & uint16(c))
-
- m = t2 - paramQ
- c = int16(m)
- c >>= 15
- t2 = m ^ ((t2 ^ m) & uint16(c))
-
- m = t3 - paramQ
- c = int16(m)
- c >>= 15
- t3 = m ^ ((t3 ^ m) & uint16(c))
-
- r[7*i+0] = byte(t0 & 0xff)
- r[7*i+1] = byte(t0>>8) | byte(t1<<6)
- r[7*i+2] = byte(t1 >> 2)
- r[7*i+3] = byte(t1>>10) | byte(t2<<4)
- r[7*i+4] = byte(t2 >> 4)
- r[7*i+5] = byte(t2>>12) | byte(t3<<2)
- r[7*i+6] = byte(t3 >> 6)
- }
-}
-
-func (p *poly) discardTo(xbuf []byte) bool {
- var x [shake128Rate * 16 / 2]uint16
- for i := range x {
- x[i] = binary.LittleEndian.Uint16(xbuf[i*2:])
- }
-
- for i := 0; i < 16; i++ {
- batcher84(x[i:])
- }
-
- // Check whether we're safe:
- r := int(0)
- for i := 1000; i < 1024; i++ {
- r |= 61444 - int(x[i])
- }
- if r>>31 != 0 {
- return true
- }
-
- // If we are, copy coefficients to polynomial:
- for i := range p.coeffs {
- p.coeffs[i] = x[i]
- }
-
- return false
-}
-
-func (p *poly) uniform(seed *[SeedBytes]byte, torSampling bool) {
- if !torSampling {
- // Reference version, vartime.
- nBlocks := 14
- var buf [shake128Rate * 14]byte
-
- // h and buf are left unscrubbed because the output is public.
- h := sha3.NewShake128()
- _, _ = h.Write(seed[:])
- _, _ = h.Read(buf[:])
-
- for ctr, pos := 0, 0; ctr < paramN; {
- val := binary.LittleEndian.Uint16(buf[pos:])
-
- if val < 5*paramQ {
- p.coeffs[ctr] = val
- ctr++
- }
- pos += 2
- if pos > shake128Rate*nBlocks-2 {
- nBlocks = 1
- _, _ = h.Read(buf[:shake128Rate])
- pos = 0
- }
- }
- } else {
- // `torref` version, every valid `a` is generate in constant time,
- // though the number of attempts varies.
- const nBlocks = 16
- var buf [shake128Rate * nBlocks]byte
-
- // h and buf are left unscrubbed because the output is public.
- h := sha3.NewShake128()
- _, _ = h.Write(seed[:])
-
- for {
- _, _ = h.Read(buf[:])
- if !p.discardTo(buf[:]) {
- break
- }
- }
-
- }
-}
-
-func (p *poly) getNoise(seed *[SeedBytes]byte, nonce byte) {
- // The `ref` code uses a uint32 vector instead of a byte vector,
- // but converting between the two in Go is cumbersome.
- var buf [4 * paramN]byte
- var n [8]byte
-
- n[0] = nonce
- stream, err := chacha20.New(seed[:], n[:])
- if err != nil {
- panic(err)
- }
- stream.KeyStream(buf[:])
- stream.Reset()
-
- for i := 0; i < paramN; i++ {
- t := binary.LittleEndian.Uint32(buf[4*i:])
- d := uint32(0)
- for j := uint(0); j < 8; j++ {
- d += (t >> j) & 0x01010101
- }
- a := ((d >> 8) & 0xff) + (d & 0xff)
- b := (d >> 24) + ((d >> 16) & 0xff)
- p.coeffs[i] = uint16(a) + paramQ - uint16(b)
- }
-
- // Scrub the random bits...
- memwipe(buf[:])
-}
-
-func (p *poly) pointwise(a, b *poly) {
- for i := range p.coeffs {
- t := montgomeryReduce(3186 * uint32(b.coeffs[i])) // t is now in Montgomery domain
- p.coeffs[i] = montgomeryReduce(uint32(a.coeffs[i]) * uint32(t)) // p.coeffs[i] is back in normal domain
- }
-}
-
-func (p *poly) add(a, b *poly) {
- for i := range p.coeffs {
- p.coeffs[i] = barrettReduce(a.coeffs[i] + b.coeffs[i])
- }
-}
-
-func (p *poly) ntt() {
- p.mulCoefficients(&psisBitrevMontgomery)
- ntt(&p.coeffs, &omegasMontgomery)
-}
-
-func (p *poly) invNtt() {
- p.bitrev()
- ntt(&p.coeffs, &omegasInvMontgomery)
- p.mulCoefficients(&psisInvMontgomery)
-}
-
-func init() {
- if paramK != 16 {
- panic("poly.getNoise() only supports k=16")
- }
-}
diff --git a/vendor/blitter.com/go/newhope/poly_simple.go b/vendor/blitter.com/go/newhope/poly_simple.go
deleted file mode 100644
index 59ebec1..0000000
--- a/vendor/blitter.com/go/newhope/poly_simple.go
+++ /dev/null
@@ -1,99 +0,0 @@
-// poly_simple.go - NewHope-Simple polynomial.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-func coeffFreeze(x uint16) uint16 {
- var c int16
-
- r := barrettReduce(x)
- m := r - paramQ
- c = int16(m)
- c >>= 15
- r = m ^ ((r ^ m) & uint16(c))
-
- return r
-}
-
-// Computes abs(x-Q/2)
-func flipAbs(x uint16) uint16 {
- r := int16(coeffFreeze(x))
- r = r - paramQ/2
- m := r >> 15
- return uint16((r + m) ^ m)
-}
-
-func (p *poly) compress(r []byte) {
- var t [8]uint32
-
- for i, k := 0, 0; i < paramN; i, k = i+8, k+3 {
- for j := range t {
- t[j] = uint32(coeffFreeze(p.coeffs[i+j]))
- t[j] = (((t[j] << 3) + paramQ/2) / paramQ) & 0x7
- }
-
- r[k] = byte(t[0]) | byte(t[1]<<3) | byte(t[2]<<6)
- r[k+1] = byte(t[2]>>2) | byte(t[3]<<1) | byte(t[4]<<4) | byte(t[5]<<7)
- r[k+2] = byte(t[5]>>1) | byte(t[6]<<2) | byte(t[7]<<5)
- }
-
- for i := range t {
- t[i] = 0
- }
-}
-
-func (p *poly) decompress(a []byte) {
- for i := 0; i < paramN; i += 8 {
- a0, a1, a2 := uint16(a[0]), uint16(a[1]), uint16(a[2])
- p.coeffs[i+0] = a0 & 7
- p.coeffs[i+1] = (a0 >> 3) & 7
- p.coeffs[i+2] = (a0 >> 6) | ((a1 << 2) & 4)
- p.coeffs[i+3] = (a1 >> 1) & 7
- p.coeffs[i+4] = (a1 >> 4) & 7
- p.coeffs[i+5] = (a1 >> 7) | ((a2 << 1) & 6)
- p.coeffs[i+6] = (a2 >> 2) & 7
- p.coeffs[i+7] = (a2 >> 5)
- a = a[3:]
- for j := 0; j < 8; j++ {
- p.coeffs[i+j] = uint16((uint32(p.coeffs[i+j])*paramQ + 4) >> 3)
- }
- }
-}
-
-func (p *poly) fromMsg(msg []byte) {
- for i := uint(0); i < 32; i++ { // XXX: const for 32
- for j := uint(0); j < 8; j++ {
- mask := -(uint16((msg[i] >> j) & 1))
- p.coeffs[8*i+j+0] = mask & (paramQ / 2)
- p.coeffs[8*i+j+256] = mask & (paramQ / 2)
- p.coeffs[8*i+j+512] = mask & (paramQ / 2)
- p.coeffs[8*i+j+768] = mask & (paramQ / 2)
- }
- }
-}
-
-func (p *poly) toMsg(msg []byte) {
- memwipe(msg[0:32])
-
- for i := uint(0); i < 256; i++ {
- t := flipAbs(p.coeffs[i+0])
- t += flipAbs(p.coeffs[i+256])
- t += flipAbs(p.coeffs[i+512])
- t += flipAbs(p.coeffs[i+768])
-
- //t = (~(t - PARAM_Q));
- t = (t - paramQ)
- t >>= 15
- msg[i>>3] |= byte(t << (i & 7))
- }
-}
-
-func (p *poly) sub(a, b *poly) {
- for i := range p.coeffs {
- p.coeffs[i] = barrettReduce(a.coeffs[i] + 3*paramQ - b.coeffs[i])
- }
-}
diff --git a/vendor/blitter.com/go/newhope/precomp.go b/vendor/blitter.com/go/newhope/precomp.go
deleted file mode 100644
index 4d61c81..0000000
--- a/vendor/blitter.com/go/newhope/precomp.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// precomp.go - NewHope precomputed tables.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-var omegasMontgomery = [paramN / 2]uint16{
- 4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344,
- 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789,
- 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843,
- 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879,
- 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702,
- 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782,
- 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445,
- 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404,
- 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127,
- 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579,
- 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956,
- 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871,
- 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174,
- 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369,
- 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942,
- 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634,
- 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64,
- 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570,
- 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958,
- 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295,
- 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167,
- 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051,
- 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255,
- 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154,
- 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077,
- 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134,
- 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949,
- 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336,
- 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525,
- 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950,
- 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901,
- 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558,
- 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741,
- 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435,
- 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302,
- 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481,
- 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511,
- 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084,
- 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136,
- 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048,
- 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919,
- 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374,
- 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195,
- 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174,
-}
-
-var omegasInvMontgomery = [paramN / 2]uint16{
- 4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 10316, 6715, 1278, 9945,
- 3514, 11248, 11271, 5925, 147, 8500, 7840, 6833, 5537, 4749, 4467, 7500,
- 11099, 9606, 6171, 8471, 8429, 5445, 11239, 7753, 9090, 12233, 5529,
- 5206, 10587, 1987, 11635, 3565, 5415, 8646, 6153, 6427, 7341, 6152,
- 10561, 400, 8410, 1922, 2033, 8291, 1359, 6854, 11035, 973, 8579, 6093,
- 6950, 5446, 11821, 8301, 11907, 316, 52, 3174, 10966, 9523, 6055, 8953,
- 11612, 6415, 2505, 5906, 10710, 11858, 8332, 9450, 10162, 151, 3482,
- 787, 5468, 1010, 4169, 9162, 5241, 9369, 7509, 8844, 7232, 4698, 192,
- 1321, 10240, 4912, 885, 6281, 10333, 7280, 8757, 11286, 58, 12048,
- 12147, 11184, 8812, 6608, 2844, 3438, 4212, 11314, 8687, 6068, 421,
- 8209, 3600, 3263, 7665, 6077, 7507, 5886, 3029, 6695, 4213, 504, 11684,
- 2302, 1962, 1594, 6328, 7183, 168, 2692, 8960, 4298, 5184, 11089, 6122,
- 9734, 10929, 3956, 5297, 6170, 3762, 9370, 4016, 4077, 6523, 652, 11994,
- 6099, 1146, 11341, 11964, 10885, 6299, 1159, 8240, 8561, 11177, 2078,
- 10331, 4322, 11367, 441, 4079, 11231, 3150, 1319, 8243, 709, 8049, 8719,
- 11454, 6224, 3054, 6803, 3123, 10542, 4433, 6370, 7032, 3834, 8633,
- 12225, 9830, 683, 1566, 5782, 9786, 9341, 12115, 723, 3009, 1693, 5735,
- 2655, 2738, 6421, 11942, 2925, 1975, 8532, 3315, 11863, 4754, 1858,
- 1583, 6347, 2500, 10800, 6374, 1483, 12240, 1263, 1815, 5383, 10777,
- 350, 6920, 10232, 4493, 9087, 8855, 8760, 9381, 218, 9928, 10446, 9259,
- 4115, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 6381, 11836,
- 8517, 418, 6860, 7515, 1293, 7552, 2767, 156, 8298, 8320, 10008, 5876,
- 5333, 10258, 10115, 4372, 2847, 7875, 8232, 9018, 8925, 1689, 8236,
- 2645, 5042, 9984, 7094, 9509, 1484, 7394, 3, 4437, 160, 3149, 113, 7370,
- 10123, 3915, 6998, 2704, 8653, 4938, 1426, 7635, 10512, 1663, 6957,
- 3510, 2370, 2865, 3978, 9320, 3247, 9603, 6882, 3186, 10659, 10163,
- 1153, 9405, 8241, 10040, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531,
- 5191, 9153, 2399, 8889, 3000, 671, 243, 3016, 3763, 10849, 12262, 9223,
- 10657, 7205, 11272, 7404, 7575, 8146, 10752, 242, 2678, 3704, 11744,
- 5019, 3833, 3778, 11899, 773, 5101, 11222, 9888, 442, 2912, 5698, 11935,
- 4861, 7277, 9808, 11244, 2859, 3780, 11414, 4976, 10682, 7201, 8005,
- 11287, 5011, 6267, 2987, 2437, 3646, 2566, 10102, 9867, 6250, 5444,
- 2381, 11796, 8193, 4337, 11854, 1912, 1378, 404, 7644, 1065, 2143,
- 11121, 5277, 3248, 11082, 2548, 8058, 8907, 11934, 1759, 8582, 3694,
- 7110, 12144, 6747, 8652, 3459, 2731, 8357, 6378, 7399, 10861, 1696,
- 9863, 334, 7657, 6534, 11029, 4388, 11560, 3241, 10276, 9000, 9408,
- 3284, 10200, 7197, 6498, 544, 2468, 339, 11267, 9, 2842, 480, 5331,
- 7300, 1673, 4278, 4177, 8705, 9764, 1381, 7837, 2396, 8340, 8993, 4354,
- 130, 6915, 2837, 11462, 5767, 953, 8541, 9813, 118, 7222, 2197, 3006,
- 9545, 563, 9314, 2625, 11340, 4821, 2639, 7266, 5828, 6561, 7698, 3328,
- 6512, 1351, 7311, 6553, 8155, 1305, 722, 5146, 4043, 12288, 10810, 2545,
- 3621, 8747, 8785, 1646, 1212, 5860, 3195, 7203, 10963, 3201, 3014, 955,
- 11499, 9970, 11119, 3135, 3712, 7443, 9542, 7484, 8736, 9995, 11227,
- 1635, 9521, 1177, 8034, 140, 10436, 11563, 7678, 4320, 11289, 9198,
- 12208, 2963, 7393, 2366, 9238,
-}
-
-var psisBitrevMontgomery = [paramN]uint16{
- 4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344,
- 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789,
- 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843,
- 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879,
- 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702,
- 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782,
- 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445,
- 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404,
- 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127,
- 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579,
- 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956,
- 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871,
- 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174,
- 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369,
- 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942,
- 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634,
- 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64,
- 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570,
- 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958,
- 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295,
- 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167,
- 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051,
- 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255,
- 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154,
- 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077,
- 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134,
- 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949,
- 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336,
- 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525,
- 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950,
- 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901,
- 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558,
- 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741,
- 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435,
- 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302,
- 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481,
- 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511,
- 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084,
- 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136,
- 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048,
- 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919,
- 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374,
- 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195,
- 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174,
- 3947, 11951, 2455, 6599, 10545, 10975, 3654, 2894, 7681, 7126, 7287,
- 12269, 4119, 3343, 2151, 1522, 7174, 7350, 11041, 2442, 2148, 5959,
- 6492, 8330, 8945, 5598, 3624, 10397, 1325, 6565, 1945, 11260, 10077,
- 2674, 3338, 3276, 11034, 506, 6505, 1392, 5478, 8778, 1178, 2776, 3408,
- 10347, 11124, 2575, 9489, 12096, 6092, 10058, 4167, 6085, 923, 11251,
- 11912, 4578, 10669, 11914, 425, 10453, 392, 10104, 8464, 4235, 8761,
- 7376, 2291, 3375, 7954, 8896, 6617, 7790, 1737, 11667, 3982, 9342, 6680,
- 636, 6825, 7383, 512, 4670, 2900, 12050, 7735, 994, 1687, 11883, 7021,
- 146, 10485, 1403, 5189, 6094, 2483, 2054, 3042, 10945, 3981, 10821,
- 11826, 8882, 8151, 180, 9600, 7684, 5219, 10880, 6780, 204, 11232, 2600,
- 7584, 3121, 3017, 11053, 7814, 7043, 4251, 4739, 11063, 6771, 7073,
- 9261, 2360, 11925, 1928, 11825, 8024, 3678, 3205, 3359, 11197, 5209,
- 8581, 3238, 8840, 1136, 9363, 1826, 3171, 4489, 7885, 346, 2068, 1389,
- 8257, 3163, 4840, 6127, 8062, 8921, 612, 4238, 10763, 8067, 125, 11749,
- 10125, 5416, 2110, 716, 9839, 10584, 11475, 11873, 3448, 343, 1908,
- 4538, 10423, 7078, 4727, 1208, 11572, 3589, 2982, 1373, 1721, 10753,
- 4103, 2429, 4209, 5412, 5993, 9011, 438, 3515, 7228, 1218, 8347, 5232,
- 8682, 1327, 7508, 4924, 448, 1014, 10029, 12221, 4566, 5836, 12229,
- 2717, 1535, 3200, 5588, 5845, 412, 5102, 7326, 3744, 3056, 2528, 7406,
- 8314, 9202, 6454, 6613, 1417, 10032, 7784, 1518, 3765, 4176, 5063, 9828,
- 2275, 6636, 4267, 6463, 2065, 7725, 3495, 8328, 8755, 8144, 10533, 5966,
- 12077, 9175, 9520, 5596, 6302, 8400, 579, 6781, 11014, 5734, 11113,
- 11164, 4860, 1131, 10844, 9068, 8016, 9694, 3837, 567, 9348, 7000, 6627,
- 7699, 5082, 682, 11309, 5207, 4050, 7087, 844, 7434, 3769, 293, 9057,
- 6940, 9344, 10883, 2633, 8190, 3944, 5530, 5604, 3480, 2171, 9282,
- 11024, 2213, 8136, 3805, 767, 12239, 216, 11520, 6763, 10353, 7, 8566,
- 845, 7235, 3154, 4360, 3285, 10268, 2832, 3572, 1282, 7559, 3229, 8360,
- 10583, 6105, 3120, 6643, 6203, 8536, 8348, 6919, 3536, 9199, 10891,
- 11463, 5043, 1658, 5618, 8787, 5789, 4719, 751, 11379, 6389, 10783,
- 3065, 7806, 6586, 2622, 5386, 510, 7628, 6921, 578, 10345, 11839, 8929,
- 4684, 12226, 7154, 9916, 7302, 8481, 3670, 11066, 2334, 1590, 7878,
- 10734, 1802, 1891, 5103, 6151, 8820, 3418, 7846, 9951, 4693, 417, 9996,
- 9652, 4510, 2946, 5461, 365, 881, 1927, 1015, 11675, 11009, 1371, 12265,
- 2485, 11385, 5039, 6742, 8449, 1842, 12217, 8176, 9577, 4834, 7937,
- 9461, 2643, 11194, 3045, 6508, 4094, 3451, 7911, 11048, 5406, 4665,
- 3020, 6616, 11345, 7519, 3669, 5287, 1790, 7014, 5410, 11038, 11249,
- 2035, 6125, 10407, 4565, 7315, 5078, 10506, 2840, 2478, 9270, 4194,
- 9195, 4518, 7469, 1160, 6878, 2730, 10421, 10036, 1734, 3815, 10939,
- 5832, 10595, 10759, 4423, 8420, 9617, 7119, 11010, 11424, 9173, 189,
- 10080, 10526, 3466, 10588, 7592, 3578, 11511, 7785, 9663, 530, 12150,
- 8957, 2532, 3317, 9349, 10243, 1481, 9332, 3454, 3758, 7899, 4218, 2593,
- 11410, 2276, 982, 6513, 1849, 8494, 9021, 4523, 7988, 8, 457, 648, 150,
- 8000, 2307, 2301, 874, 5650, 170, 9462, 2873, 9855, 11498, 2535, 11169,
- 5808, 12268, 9687, 1901, 7171, 11787, 3846, 1573, 6063, 3793, 466,
- 11259, 10608, 3821, 6320, 4649, 6263, 2929,
-}
-
-var psisInvMontgomery = [paramN]uint16{
- 256, 10570, 1510, 7238, 1034, 7170, 6291, 7921, 11665, 3422, 4000, 2327,
- 2088, 5565, 795, 10647, 1521, 5484, 2539, 7385, 1055, 7173, 8047, 11683,
- 1669, 1994, 3796, 5809, 4341, 9398, 11876, 12230, 10525, 12037, 12253,
- 3506, 4012, 9351, 4847, 2448, 7372, 9831, 3160, 2207, 5582, 2553, 7387,
- 6322, 9681, 1383, 10731, 1533, 219, 5298, 4268, 7632, 6357, 9686, 8406,
- 4712, 9451, 10128, 4958, 5975, 11387, 8649, 11769, 6948, 11526, 12180,
- 1740, 10782, 6807, 2728, 7412, 4570, 4164, 4106, 11120, 12122, 8754,
- 11784, 3439, 5758, 11356, 6889, 9762, 11928, 1704, 1999, 10819, 12079,
- 12259, 7018, 11536, 1648, 1991, 2040, 2047, 2048, 10826, 12080, 8748,
- 8272, 8204, 1172, 1923, 7297, 2798, 7422, 6327, 4415, 7653, 6360, 11442,
- 12168, 7005, 8023, 9924, 8440, 8228, 2931, 7441, 1063, 3663, 5790, 9605,
- 10150, 1450, 8985, 11817, 10466, 10273, 12001, 3470, 7518, 1074, 1909,
- 7295, 9820, 4914, 702, 5367, 7789, 8135, 9940, 1420, 3714, 11064, 12114,
- 12264, 1752, 5517, 9566, 11900, 1700, 3754, 5803, 829, 1874, 7290, 2797,
- 10933, 5073, 7747, 8129, 6428, 6185, 11417, 1631, 233, 5300, 9535,
- 10140, 11982, 8734, 8270, 2937, 10953, 8587, 8249, 2934, 9197, 4825,
- 5956, 4362, 9401, 1343, 3703, 529, 10609, 12049, 6988, 6265, 895, 3639,
- 4031, 4087, 4095, 585, 10617, 8539, 4731, 4187, 9376, 3095, 9220, 10095,
- 10220, 1460, 10742, 12068, 1724, 5513, 11321, 6884, 2739, 5658, 6075,
- 4379, 11159, 10372, 8504, 4726, 9453, 3106, 7466, 11600, 10435, 8513,
- 9994, 8450, 9985, 3182, 10988, 8592, 2983, 9204, 4826, 2445, 5616, 6069,
- 867, 3635, 5786, 11360, 5134, 2489, 10889, 12089, 1727, 7269, 2794,
- 9177, 1311, 5454, 9557, 6632, 2703, 9164, 10087, 1441, 3717, 531, 3587,
- 2268, 324, 5313, 759, 1864, 5533, 2546, 7386, 9833, 8427, 4715, 11207,
- 1601, 7251, 4547, 11183, 12131, 1733, 10781, 10318, 1474, 10744, 5046,
- 4232, 11138, 10369, 6748, 964, 7160, 4534, 7670, 8118, 8182, 4680,
- 11202, 6867, 981, 8918, 1274, 182, 26, 7026, 8026, 11680, 12202, 10521,
- 1503, 7237, 4545, 5916, 9623, 8397, 11733, 10454, 3249, 9242, 6587, 941,
- 1890, 270, 10572, 6777, 9746, 6659, 6218, 6155, 6146, 878, 1881, 7291,
- 11575, 12187, 1741, 7271, 8061, 11685, 6936, 4502, 9421, 4857, 4205,
- 7623, 1089, 10689, 1527, 8996, 10063, 11971, 10488, 6765, 2722, 3900,
- 9335, 11867, 6962, 11528, 5158, 4248, 4118, 5855, 2592, 5637, 6072,
- 2623, 7397, 8079, 9932, 4930, 5971, 853, 3633, 519, 8852, 11798, 3441,
- 11025, 1575, 225, 8810, 11792, 12218, 3501, 9278, 3081, 9218, 4828,
- 7712, 8124, 11694, 12204, 3499, 4011, 573, 3593, 5780, 7848, 9899,
- 10192, 1456, 208, 7052, 2763, 7417, 11593, 10434, 12024, 8740, 11782,
- 10461, 3250, 5731, 7841, 9898, 1414, 202, 3540, 7528, 2831, 2160, 10842,
- 5060, 4234, 4116, 588, 84, 12, 7024, 2759, 9172, 6577, 11473, 1639,
- 9012, 3043, 7457, 6332, 11438, 1634, 1989, 9062, 11828, 8712, 11778,
- 12216, 10523, 6770, 9745, 10170, 4964, 9487, 6622, 946, 8913, 6540,
- 6201, 4397, 9406, 8366, 9973, 8447, 8229, 11709, 8695, 10020, 3187,
- 5722, 2573, 10901, 6824, 4486, 4152, 9371, 8361, 2950, 2177, 311, 1800,
- 9035, 8313, 11721, 3430, 490, 70, 10, 1757, 251, 3547, 7529, 11609,
- 3414, 7510, 4584, 4166, 9373, 1339, 5458, 7802, 11648, 1664, 7260, 9815,
- 10180, 6721, 9738, 10169, 8475, 8233, 9954, 1422, 8981, 1283, 5450,
- 11312, 1616, 3742, 11068, 10359, 4991, 713, 3613, 9294, 8350, 4704, 672,
- 96, 7036, 9783, 11931, 3460, 5761, 823, 10651, 12055, 10500, 1500, 5481,
- 783, 3623, 11051, 8601, 8251, 8201, 11705, 10450, 5004, 4226, 7626,
- 2845, 2162, 3820, 7568, 9859, 3164, 452, 10598, 1514, 5483, 6050, 6131,
- 4387, 7649, 8115, 6426, 918, 8909, 8295, 1185, 5436, 11310, 8638, 1234,
- 5443, 11311, 5127, 2488, 2111, 10835, 5059, 7745, 2862, 3920, 560, 80,
- 1767, 2008, 3798, 11076, 6849, 2734, 10924, 12094, 8750, 1250, 10712,
- 6797, 971, 7161, 1023, 8924, 4786, 7706, 4612, 4170, 7618, 6355, 4419,
- 5898, 11376, 10403, 10264, 6733, 4473, 639, 5358, 2521, 9138, 3061,
- 5704, 4326, 618, 5355, 765, 5376, 768, 7132, 4530, 9425, 3102, 9221,
- 6584, 11474, 10417, 10266, 12000, 6981, 6264, 4406, 2385, 7363, 4563,
- 4163, 7617, 9866, 3165, 9230, 11852, 10471, 5007, 5982, 11388, 5138,
- 734, 3616, 11050, 12112, 6997, 11533, 12181, 10518, 12036, 3475, 2252,
- 7344, 9827, 4915, 9480, 6621, 4457, 7659, 9872, 6677, 4465, 4149, 7615,
- 4599, 657, 3605, 515, 10607, 6782, 4480, 640, 1847, 3775, 5806, 2585,
- 5636, 9583, 1369, 10729, 8555, 10000, 11962, 5220, 7768, 8132, 8184,
- 9947, 1421, 203, 29, 8782, 11788, 1684, 10774, 10317, 4985, 9490, 8378,
- 4708, 11206, 5112, 5997, 7879, 11659, 12199, 8765, 10030, 4944, 5973,
- 6120, 6141, 6144, 7900, 11662, 1666, 238, 34, 3516, 5769, 9602, 8394,
- 9977, 6692, 956, 10670, 6791, 9748, 11926, 8726, 11780, 5194, 742, 106,
- 8793, 10034, 3189, 10989, 5081, 4237, 5872, 4350, 2377, 10873, 6820,
- 6241, 11425, 10410, 10265, 3222, 5727, 9596, 4882, 2453, 2106, 3812,
- 11078, 12116, 5242, 4260, 11142, 8614, 11764, 12214, 5256, 4262, 4120,
- 11122, 5100, 11262, 5120, 2487, 5622, 9581, 8391, 8221, 2930, 10952,
- 12098, 6995, 6266, 9673, 4893, 699, 3611, 4027, 5842, 11368, 1624, 232,
- 8811, 8281, 1183, 169, 8802, 3013, 2186, 5579, 797, 3625, 4029, 11109,
- 1587, 7249, 11569, 8675, 6506, 2685, 10917, 12093, 12261, 12285, 1755,
- 7273, 1039, 1904, 272, 3550, 9285, 3082, 5707, 6082, 4380, 7648, 11626,
- 5172, 4250, 9385, 8363, 8217, 4685, 5936, 848, 8899, 6538, 934, 1889,
- 3781, 9318, 10109, 10222, 6727, 961, 5404, 772, 5377, 9546, 8386, 1198,
- 8949, 3034, 2189, 7335, 4559, 5918, 2601, 10905, 5069, 9502, 3113, 7467,
- 8089, 11689, 5181, 9518, 8382, 2953, 3933, 4073, 4093, 7607, 8109, 2914,
- 5683, 4323, 11151, 1593, 10761, 6804, 972, 3650, 2277, 5592, 4310, 7638,
- 9869, 4921, 703, 1856, 9043, 4803, 9464, 1352, 8971, 11815, 5199, 7765,
- 6376, 4422, 7654, 2849, 407, 8836, 6529, 7955, 2892, 9191, 1313, 10721,
- 12065, 12257, 1751, 9028, 8312, 2943, 2176, 3822, 546, 78, 8789, 11789,
- 10462, 12028, 6985, 4509, 9422, 1346, 5459, 4291, 613, 10621, 6784,
- 9747, 3148, 7472, 2823, 5670, 810, 7138, 8042, 4660, 7688, 6365, 6176,
- 6149, 2634, 5643, 9584, 10147, 11983, 5223, 9524, 11894, 10477, 8519,
- 1217, 3685, 2282, 326, 10580, 3267, 7489, 4581, 2410, 5611, 11335, 6886,
- 8006, 8166, 11700, 3427, 11023, 8597, 10006, 3185, 455, 65, 5276, 7776,
- 4622, 5927, 7869, 9902, 11948, 5218, 2501, 5624, 2559, 10899, 1557,
- 1978, 10816, 10323, 8497, 4725, 675, 1852, 10798, 12076, 10503, 3256,
- 9243, 3076, 2195, 10847, 12083, 10504, 12034, 10497,
-}
diff --git a/vendor/blitter.com/go/newhope/reduce.go b/vendor/blitter.com/go/newhope/reduce.go
deleted file mode 100644
index 26df25a..0000000
--- a/vendor/blitter.com/go/newhope/reduce.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// poly.go - NewHope reductions.
-//
-// To the extent possible under law, Yawning Angel has waived all copyright
-// and related or neighboring rights to newhope, using the Creative
-// Commons "CC0" public domain dedication. See LICENSE or
-// for full details.
-
-package newhope
-
-// Incomplete-reduction routines; for details on allowed input ranges
-// and produced output ranges, see the description in the paper:
-// https://cryptojedi.org/papers/#newhope
-
-const (
- qinv = 12287 // -inverse_mod(p,2^18)
- rlog = 18
-)
-
-func montgomeryReduce(a uint32) uint16 {
- u := a * qinv
- u &= ((1 << rlog) - 1)
- u *= paramQ
- a = (a + u) >> 18
- return uint16(a)
-}
-
-func barrettReduce(a uint16) uint16 {
- u := (uint32(a) * 5) >> 16
- u *= paramQ
- a -= uint16(u)
- return a
-}
diff --git a/vendor/github.com/aead/chacha20/LICENSE b/vendor/github.com/aead/chacha20/LICENSE
deleted file mode 100644
index b6a9210..0000000
--- a/vendor/github.com/aead/chacha20/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2016 Andreas Auernhammer
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha.go b/vendor/github.com/aead/chacha20/chacha/chacha.go
deleted file mode 100644
index c2b39da..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha.go
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// Package chacha implements some low-level functions of the
-// ChaCha cipher family.
-package chacha // import "github.com/aead/chacha20/chacha"
-
-import (
- "encoding/binary"
- "errors"
- "math"
-)
-
-const (
- // NonceSize is the size of the ChaCha20 nonce in bytes.
- NonceSize = 8
-
- // INonceSize is the size of the IETF-ChaCha20 nonce in bytes.
- INonceSize = 12
-
- // XNonceSize is the size of the XChaCha20 nonce in bytes.
- XNonceSize = 24
-
- // KeySize is the size of the key in bytes.
- KeySize = 32
-)
-
-var (
- useSSE2 bool
- useSSSE3 bool
- useAVX bool
- useAVX2 bool
-)
-
-var (
- errKeySize = errors.New("chacha20/chacha: bad key length")
- errInvalidNonce = errors.New("chacha20/chacha: bad nonce length")
-)
-
-func setup(state *[64]byte, nonce, key []byte) (err error) {
- if len(key) != KeySize {
- err = errKeySize
- return
- }
- var Nonce [16]byte
- switch len(nonce) {
- case NonceSize:
- copy(Nonce[8:], nonce)
- initialize(state, key, &Nonce)
- case INonceSize:
- copy(Nonce[4:], nonce)
- initialize(state, key, &Nonce)
- case XNonceSize:
- var tmpKey [32]byte
- var hNonce [16]byte
-
- copy(hNonce[:], nonce[:16])
- copy(tmpKey[:], key)
- HChaCha20(&tmpKey, &hNonce, &tmpKey)
- copy(Nonce[8:], nonce[16:])
- initialize(state, tmpKey[:], &Nonce)
-
- // BUG(aead): A "good" compiler will remove this (optimizations)
- // But using the provided key instead of tmpKey,
- // will change the key (-> probably confuses users)
- for i := range tmpKey {
- tmpKey[i] = 0
- }
- default:
- err = errInvalidNonce
- }
- return
-}
-
-// XORKeyStream crypts bytes from src to dst using the given nonce and key.
-// The length of the nonce determinds the version of ChaCha20:
-// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
-// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
-// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
-// The rounds argument specifies the number of rounds performed for keystream
-// generation - valid values are 8, 12 or 20. The src and dst may be the same slice
-// but otherwise should not overlap. If len(dst) < len(src) this function panics.
-// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
-func XORKeyStream(dst, src, nonce, key []byte, rounds int) {
- if rounds != 20 && rounds != 12 && rounds != 8 {
- panic("chacha20/chacha: bad number of rounds")
- }
- if len(dst) < len(src) {
- panic("chacha20/chacha: dst buffer is to small")
- }
- if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) {
- panic("chacha20/chacha: src is too large")
- }
-
- var block, state [64]byte
- if err := setup(&state, nonce, key); err != nil {
- panic(err)
- }
- xorKeyStream(dst, src, &block, &state, rounds)
-}
-
-// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r.
-type Cipher struct {
- state, block [64]byte
- off int
- rounds int // 20 for ChaCha20
- noncesize int
-}
-
-// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r
-// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time.
-// The length of the nonce determinds the version of ChaCha20:
-// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
-// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
-// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
-// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
-func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) {
- if rounds != 20 && rounds != 12 && rounds != 8 {
- panic("chacha20/chacha: bad number of rounds")
- }
-
- c := new(Cipher)
- if err := setup(&(c.state), nonce, key); err != nil {
- return nil, err
- }
- c.rounds = rounds
-
- if len(nonce) == INonceSize {
- c.noncesize = INonceSize
- } else {
- c.noncesize = NonceSize
- }
-
- return c, nil
-}
-
-// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice
-// but otherwise should not overlap. If len(dst) < len(src) the function panics.
-func (c *Cipher) XORKeyStream(dst, src []byte) {
- if len(dst) < len(src) {
- panic("chacha20/chacha: dst buffer is to small")
- }
-
- if c.off > 0 {
- n := len(c.block[c.off:])
- if len(src) <= n {
- for i, v := range src {
- dst[i] = v ^ c.block[c.off]
- c.off++
- }
- if c.off == 64 {
- c.off = 0
- }
- return
- }
-
- for i, v := range c.block[c.off:] {
- dst[i] = src[i] ^ v
- }
- src = src[n:]
- dst = dst[n:]
- c.off = 0
- }
-
- // check for counter overflow
- blocksToXOR := len(src) / 64
- if len(src)%64 != 0 {
- blocksToXOR++
- }
- var overflow bool
- if c.noncesize == INonceSize {
- overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR)
- } else {
- overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR)
- }
- if overflow {
- panic("chacha20/chacha: counter overflow")
- }
-
- c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds)
-}
-
-// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher.
-// This function always skips the unused keystream of the current 64 byte block.
-func (c *Cipher) SetCounter(ctr uint64) {
- if c.noncesize == INonceSize {
- binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr))
- } else {
- binary.LittleEndian.PutUint64(c.state[48:], ctr)
- }
- c.off = 0
-}
-
-// HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key.
-// It can be used as a key-derivation-function (KDF).
-func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) }
diff --git a/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s b/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
deleted file mode 100644
index c2b5f52..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
+++ /dev/null
@@ -1,406 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine,!nacl
-
-#include "const.s"
-#include "macro.s"
-
-#define TWO 0(SP)
-#define C16 32(SP)
-#define C8 64(SP)
-#define STATE_0 96(SP)
-#define STATE_1 128(SP)
-#define STATE_2 160(SP)
-#define STATE_3 192(SP)
-#define TMP_0 224(SP)
-#define TMP_1 256(SP)
-
-// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
-TEXT ·xorKeyStreamAVX2(SB), 4, $320-80
- MOVQ dst_base+0(FP), DI
- MOVQ src_base+24(FP), SI
- MOVQ block+48(FP), BX
- MOVQ state+56(FP), AX
- MOVQ rounds+64(FP), DX
- MOVQ src_len+32(FP), CX
-
- MOVQ SP, R8
- ADDQ $32, SP
- ANDQ $-32, SP
-
- VMOVDQU 0(AX), Y2
- VMOVDQU 32(AX), Y3
- VPERM2I128 $0x22, Y2, Y0, Y0
- VPERM2I128 $0x33, Y2, Y1, Y1
- VPERM2I128 $0x22, Y3, Y2, Y2
- VPERM2I128 $0x33, Y3, Y3, Y3
-
- TESTQ CX, CX
- JZ done
-
- VMOVDQU ·one_AVX2<>(SB), Y4
- VPADDD Y4, Y3, Y3
-
- VMOVDQA Y0, STATE_0
- VMOVDQA Y1, STATE_1
- VMOVDQA Y2, STATE_2
- VMOVDQA Y3, STATE_3
-
- VMOVDQU ·rol16_AVX2<>(SB), Y4
- VMOVDQU ·rol8_AVX2<>(SB), Y5
- VMOVDQU ·two_AVX2<>(SB), Y6
- VMOVDQA Y4, Y14
- VMOVDQA Y5, Y15
- VMOVDQA Y4, C16
- VMOVDQA Y5, C8
- VMOVDQA Y6, TWO
-
- CMPQ CX, $64
- JBE between_0_and_64
- CMPQ CX, $192
- JBE between_64_and_192
- CMPQ CX, $320
- JBE between_192_and_320
- CMPQ CX, $448
- JBE between_320_and_448
-
-at_least_512:
- VMOVDQA Y0, Y4
- VMOVDQA Y1, Y5
- VMOVDQA Y2, Y6
- VPADDQ TWO, Y3, Y7
- VMOVDQA Y0, Y8
- VMOVDQA Y1, Y9
- VMOVDQA Y2, Y10
- VPADDQ TWO, Y7, Y11
- VMOVDQA Y0, Y12
- VMOVDQA Y1, Y13
- VMOVDQA Y2, Y14
- VPADDQ TWO, Y11, Y15
-
- MOVQ DX, R9
-
-chacha_loop_512:
- VMOVDQA Y8, TMP_0
- CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
- VMOVDQA TMP_0, Y8
- VMOVDQA Y0, TMP_0
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
- CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
- CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
- CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
- CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
- CHACHA_SHUFFLE_AVX(Y13, Y14, Y15)
-
- CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
- VMOVDQA TMP_0, Y0
- VMOVDQA Y8, TMP_0
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
- CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
- VMOVDQA TMP_0, Y8
- CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
- CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
- CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
- CHACHA_SHUFFLE_AVX(Y15, Y14, Y13)
- SUBQ $2, R9
- JA chacha_loop_512
-
- VMOVDQA Y12, TMP_0
- VMOVDQA Y13, TMP_1
- VPADDD STATE_0, Y0, Y0
- VPADDD STATE_1, Y1, Y1
- VPADDD STATE_2, Y2, Y2
- VPADDD STATE_3, Y3, Y3
- XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
- VMOVDQA STATE_0, Y0
- VMOVDQA STATE_1, Y1
- VMOVDQA STATE_2, Y2
- VMOVDQA STATE_3, Y3
- VPADDQ TWO, Y3, Y3
-
- VPADDD Y0, Y4, Y4
- VPADDD Y1, Y5, Y5
- VPADDD Y2, Y6, Y6
- VPADDD Y3, Y7, Y7
- XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
- VPADDQ TWO, Y3, Y3
-
- VPADDD Y0, Y8, Y8
- VPADDD Y1, Y9, Y9
- VPADDD Y2, Y10, Y10
- VPADDD Y3, Y11, Y11
- XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
- VPADDQ TWO, Y3, Y3
-
- VPADDD TMP_0, Y0, Y12
- VPADDD TMP_1, Y1, Y13
- VPADDD Y2, Y14, Y14
- VPADDD Y3, Y15, Y15
- VPADDQ TWO, Y3, Y3
-
- CMPQ CX, $512
- JB less_than_512
-
- XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
- VMOVDQA Y3, STATE_3
- ADDQ $512, SI
- ADDQ $512, DI
- SUBQ $512, CX
- CMPQ CX, $448
- JA at_least_512
-
- TESTQ CX, CX
- JZ done
-
- VMOVDQA C16, Y14
- VMOVDQA C8, Y15
-
- CMPQ CX, $64
- JBE between_0_and_64
- CMPQ CX, $192
- JBE between_64_and_192
- CMPQ CX, $320
- JBE between_192_and_320
- JMP between_320_and_448
-
-less_than_512:
- XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
- EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4)
- ADDQ $448, SI
- ADDQ $448, DI
- SUBQ $448, CX
- JMP finalize
-
-between_320_and_448:
- VMOVDQA Y0, Y4
- VMOVDQA Y1, Y5
- VMOVDQA Y2, Y6
- VPADDQ TWO, Y3, Y7
- VMOVDQA Y0, Y8
- VMOVDQA Y1, Y9
- VMOVDQA Y2, Y10
- VPADDQ TWO, Y7, Y11
-
- MOVQ DX, R9
-
-chacha_loop_384:
- CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
- CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
- CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
- CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
- CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
- CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
- SUBQ $2, R9
- JA chacha_loop_384
-
- VPADDD STATE_0, Y0, Y0
- VPADDD STATE_1, Y1, Y1
- VPADDD STATE_2, Y2, Y2
- VPADDD STATE_3, Y3, Y3
- XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
- VMOVDQA STATE_0, Y0
- VMOVDQA STATE_1, Y1
- VMOVDQA STATE_2, Y2
- VMOVDQA STATE_3, Y3
- VPADDQ TWO, Y3, Y3
-
- VPADDD Y0, Y4, Y4
- VPADDD Y1, Y5, Y5
- VPADDD Y2, Y6, Y6
- VPADDD Y3, Y7, Y7
- XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
- VPADDQ TWO, Y3, Y3
-
- VPADDD Y0, Y8, Y8
- VPADDD Y1, Y9, Y9
- VPADDD Y2, Y10, Y10
- VPADDD Y3, Y11, Y11
- VPADDQ TWO, Y3, Y3
-
- CMPQ CX, $384
- JB less_than_384
-
- XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
- SUBQ $384, CX
- TESTQ CX, CX
- JE done
-
- ADDQ $384, SI
- ADDQ $384, DI
- JMP between_0_and_64
-
-less_than_384:
- XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
- EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
- ADDQ $320, SI
- ADDQ $320, DI
- SUBQ $320, CX
- JMP finalize
-
-between_192_and_320:
- VMOVDQA Y0, Y4
- VMOVDQA Y1, Y5
- VMOVDQA Y2, Y6
- VMOVDQA Y3, Y7
- VMOVDQA Y0, Y8
- VMOVDQA Y1, Y9
- VMOVDQA Y2, Y10
- VPADDQ TWO, Y3, Y11
-
- MOVQ DX, R9
-
-chacha_loop_256:
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
- CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
- CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
- SUBQ $2, R9
- JA chacha_loop_256
-
- VPADDD Y0, Y4, Y4
- VPADDD Y1, Y5, Y5
- VPADDD Y2, Y6, Y6
- VPADDD Y3, Y7, Y7
- VPADDQ TWO, Y3, Y3
- XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
- VPADDD Y0, Y8, Y8
- VPADDD Y1, Y9, Y9
- VPADDD Y2, Y10, Y10
- VPADDD Y3, Y11, Y11
- VPADDQ TWO, Y3, Y3
-
- CMPQ CX, $256
- JB less_than_256
-
- XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
- SUBQ $256, CX
- TESTQ CX, CX
- JE done
-
- ADDQ $256, SI
- ADDQ $256, DI
- JMP between_0_and_64
-
-less_than_256:
- XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
- EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
- ADDQ $192, SI
- ADDQ $192, DI
- SUBQ $192, CX
- JMP finalize
-
-between_64_and_192:
- VMOVDQA Y0, Y4
- VMOVDQA Y1, Y5
- VMOVDQA Y2, Y6
- VMOVDQA Y3, Y7
-
- MOVQ DX, R9
-
-chacha_loop_128:
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
- CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
- CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
- SUBQ $2, R9
- JA chacha_loop_128
-
- VPADDD Y0, Y4, Y4
- VPADDD Y1, Y5, Y5
- VPADDD Y2, Y6, Y6
- VPADDD Y3, Y7, Y7
- VPADDQ TWO, Y3, Y3
-
- CMPQ CX, $128
- JB less_than_128
-
- XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
- SUBQ $128, CX
- TESTQ CX, CX
- JE done
-
- ADDQ $128, SI
- ADDQ $128, DI
- JMP between_0_and_64
-
-less_than_128:
- XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
- EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13)
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, CX
- JMP finalize
-
-between_0_and_64:
- VMOVDQA X0, X4
- VMOVDQA X1, X5
- VMOVDQA X2, X6
- VMOVDQA X3, X7
-
- MOVQ DX, R9
-
-chacha_loop_64:
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
- CHACHA_SHUFFLE_AVX(X5, X6, X7)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
- CHACHA_SHUFFLE_AVX(X7, X6, X5)
- SUBQ $2, R9
- JA chacha_loop_64
-
- VPADDD X0, X4, X4
- VPADDD X1, X5, X5
- VPADDD X2, X6, X6
- VPADDD X3, X7, X7
- VMOVDQU ·one<>(SB), X0
- VPADDQ X0, X3, X3
-
- CMPQ CX, $64
- JB less_than_64
-
- XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13)
- SUBQ $64, CX
- JMP done
-
-less_than_64:
- VMOVDQU X4, 0(BX)
- VMOVDQU X5, 16(BX)
- VMOVDQU X6, 32(BX)
- VMOVDQU X7, 48(BX)
-
-finalize:
- XORQ R11, R11
- XORQ R12, R12
- MOVQ CX, BP
-
-xor_loop:
- MOVB 0(SI), R11
- MOVB 0(BX), R12
- XORQ R11, R12
- MOVB R12, 0(DI)
- INCQ SI
- INCQ BX
- INCQ DI
- DECQ BP
- JA xor_loop
-
-done:
- VMOVDQU X3, 48(AX)
- VZEROUPPER
- MOVQ R8, SP
- MOVQ CX, ret+72(FP)
- RET
-
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_386.go b/vendor/github.com/aead/chacha20/chacha/chacha_386.go
deleted file mode 100644
index 97e533d..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_386.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build 386,!gccgo,!appengine,!nacl
-
-package chacha
-
-import (
- "encoding/binary"
-
- "golang.org/x/sys/cpu"
-)
-
-func init() {
- useSSE2 = cpu.X86.HasSSE2
- useSSSE3 = cpu.X86.HasSSSE3
- useAVX = false
- useAVX2 = false
-}
-
-func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
- binary.LittleEndian.PutUint32(state[0:], sigma[0])
- binary.LittleEndian.PutUint32(state[4:], sigma[1])
- binary.LittleEndian.PutUint32(state[8:], sigma[2])
- binary.LittleEndian.PutUint32(state[12:], sigma[3])
- copy(state[16:], key[:])
- copy(state[48:], nonce[:])
-}
-
-// This function is implemented in chacha_386.s
-//go:noescape
-func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
-
-// This function is implemented in chacha_386.s
-//go:noescape
-func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
-
-// This function is implemented in chacha_386.s
-//go:noescape
-func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
-
-func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
- switch {
- case useSSSE3:
- hChaCha20SSSE3(out, nonce, key)
- case useSSE2:
- hChaCha20SSE2(out, nonce, key)
- default:
- hChaCha20Generic(out, nonce, key)
- }
-}
-
-func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
- if useSSE2 {
- return xorKeyStreamSSE2(dst, src, block, state, rounds)
- } else {
- return xorKeyStreamGeneric(dst, src, block, state, rounds)
- }
-}
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_386.s b/vendor/github.com/aead/chacha20/chacha/chacha_386.s
deleted file mode 100644
index 262fc86..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_386.s
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build 386,!gccgo,!appengine,!nacl
-
-#include "const.s"
-#include "macro.s"
-
-// FINALIZE xors len bytes from src and block using
-// the temp. registers t0 and t1 and writes the result
-// to dst.
-#define FINALIZE(dst, src, block, len, t0, t1) \
- XORL t0, t0; \
- XORL t1, t1; \
- FINALIZE_LOOP:; \
- MOVB 0(src), t0; \
- MOVB 0(block), t1; \
- XORL t0, t1; \
- MOVB t1, 0(dst); \
- INCL src; \
- INCL block; \
- INCL dst; \
- DECL len; \
- JG FINALIZE_LOOP \
-
-#define Dst DI
-#define Nonce AX
-#define Key BX
-#define Rounds DX
-
-// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
-TEXT ·hChaCha20SSE2(SB), 4, $0-12
- MOVL out+0(FP), Dst
- MOVL nonce+4(FP), Nonce
- MOVL key+8(FP), Key
-
- MOVOU ·sigma<>(SB), X0
- MOVOU 0*16(Key), X1
- MOVOU 1*16(Key), X2
- MOVOU 0*16(Nonce), X3
- MOVL $20, Rounds
-
-chacha_loop:
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- SUBL $2, Rounds
- JNZ chacha_loop
-
- MOVOU X0, 0*16(Dst)
- MOVOU X3, 1*16(Dst)
- RET
-
-// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
-TEXT ·hChaCha20SSSE3(SB), 4, $0-12
- MOVL out+0(FP), Dst
- MOVL nonce+4(FP), Nonce
- MOVL key+8(FP), Key
-
- MOVOU ·sigma<>(SB), X0
- MOVOU 0*16(Key), X1
- MOVOU 1*16(Key), X2
- MOVOU 0*16(Nonce), X3
- MOVL $20, Rounds
-
- MOVOU ·rol16<>(SB), X5
- MOVOU ·rol8<>(SB), X6
-
-chacha_loop:
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- SUBL $2, Rounds
- JNZ chacha_loop
-
- MOVOU X0, 0*16(Dst)
- MOVOU X3, 1*16(Dst)
- RET
-
-#undef Dst
-#undef Nonce
-#undef Key
-#undef Rounds
-
-#define State AX
-#define Dst DI
-#define Src SI
-#define Len DX
-#define Tmp0 BX
-#define Tmp1 BP
-
-// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
-TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
- MOVL dst_base+0(FP), Dst
- MOVL src_base+12(FP), Src
- MOVL state+28(FP), State
- MOVL src_len+16(FP), Len
- MOVL $0, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0
-
- MOVOU 0*16(State), X0
- MOVOU 1*16(State), X1
- MOVOU 2*16(State), X2
- MOVOU 3*16(State), X3
- TESTL Len, Len
- JZ DONE
-
-GENERATE_KEYSTREAM:
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X3, X7
- MOVL rounds+32(FP), Tmp0
-
-CHACHA_LOOP:
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBL $2, Tmp0
- JA CHACHA_LOOP
-
- MOVOU 0*16(State), X0 // Restore X0 from state
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- MOVOU ·one<>(SB), X0
- PADDQ X0, X3
-
- CMPL Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0)
- MOVOU 0*16(State), X0 // Restore X0 from state
- ADDL $64, Src
- ADDL $64, Dst
- SUBL $64, Len
- JZ DONE
- JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte
-
-BUFFER_KEYSTREAM:
- MOVL block+24(FP), State
- MOVOU X4, 0(State)
- MOVOU X5, 16(State)
- MOVOU X6, 32(State)
- MOVOU X7, 48(State)
- MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 < Len < 64
- FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1)
-
-DONE:
- MOVL state+28(FP), State
- MOVOU X3, 3*16(State)
- RET
-
-#undef State
-#undef Dst
-#undef Src
-#undef Len
-#undef Tmp0
-#undef Tmp1
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
deleted file mode 100644
index 635f7de..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build go1.7,amd64,!gccgo,!appengine,!nacl
-
-package chacha
-
-import "golang.org/x/sys/cpu"
-
-func init() {
- useSSE2 = cpu.X86.HasSSE2
- useSSSE3 = cpu.X86.HasSSSE3
- useAVX = cpu.X86.HasAVX
- useAVX2 = cpu.X86.HasAVX2
-}
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func initialize(state *[64]byte, key []byte, nonce *[16]byte)
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
-
-// This function is implemented in chachaAVX2_amd64.s
-//go:noescape
-func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
-
-// This function is implemented in chacha_amd64.s
-//go:noescape
-func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
-
-// This function is implemented in chachaAVX2_amd64.s
-//go:noescape
-func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
-
-func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
- switch {
- case useAVX:
- hChaCha20AVX(out, nonce, key)
- case useSSSE3:
- hChaCha20SSSE3(out, nonce, key)
- case useSSE2:
- hChaCha20SSE2(out, nonce, key)
- default:
- hChaCha20Generic(out, nonce, key)
- }
-}
-
-func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
- switch {
- case useAVX2:
- return xorKeyStreamAVX2(dst, src, block, state, rounds)
- case useAVX:
- return xorKeyStreamAVX(dst, src, block, state, rounds)
- case useSSSE3:
- return xorKeyStreamSSSE3(dst, src, block, state, rounds)
- case useSSE2:
- return xorKeyStreamSSE2(dst, src, block, state, rounds)
- default:
- return xorKeyStreamGeneric(dst, src, block, state, rounds)
- }
-}
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
deleted file mode 100644
index 26a2383..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
+++ /dev/null
@@ -1,1072 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine,!nacl
-
-#include "const.s"
-#include "macro.s"
-
-// FINALIZE xors len bytes from src and block using
-// the temp. registers t0 and t1 and writes the result
-// to dst.
-#define FINALIZE(dst, src, block, len, t0, t1) \
- XORQ t0, t0; \
- XORQ t1, t1; \
- FINALIZE_LOOP:; \
- MOVB 0(src), t0; \
- MOVB 0(block), t1; \
- XORQ t0, t1; \
- MOVB t1, 0(dst); \
- INCQ src; \
- INCQ block; \
- INCQ dst; \
- DECQ len; \
- JG FINALIZE_LOOP \
-
-#define Dst DI
-#define Nonce AX
-#define Key BX
-#define Rounds DX
-
-// func initialize(state *[64]byte, key []byte, nonce *[16]byte)
-TEXT ·initialize(SB), 4, $0-40
- MOVQ state+0(FP), Dst
- MOVQ key+8(FP), Key
- MOVQ nonce+32(FP), Nonce
-
- MOVOU ·sigma<>(SB), X0
- MOVOU 0*16(Key), X1
- MOVOU 1*16(Key), X2
- MOVOU 0*16(Nonce), X3
-
- MOVOU X0, 0*16(Dst)
- MOVOU X1, 1*16(Dst)
- MOVOU X2, 2*16(Dst)
- MOVOU X3, 3*16(Dst)
- RET
-
-// func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
-TEXT ·hChaCha20AVX(SB), 4, $0-24
- MOVQ out+0(FP), Dst
- MOVQ nonce+8(FP), Nonce
- MOVQ key+16(FP), Key
-
- VMOVDQU ·sigma<>(SB), X0
- VMOVDQU 0*16(Key), X1
- VMOVDQU 1*16(Key), X2
- VMOVDQU 0*16(Nonce), X3
- VMOVDQU ·rol16_AVX2<>(SB), X5
- VMOVDQU ·rol8_AVX2<>(SB), X6
- MOVQ $20, Rounds
-
-CHACHA_LOOP:
- CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_AVX(X1, X2, X3)
- CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_AVX(X3, X2, X1)
- SUBQ $2, Rounds
- JNZ CHACHA_LOOP
-
- VMOVDQU X0, 0*16(Dst)
- VMOVDQU X3, 1*16(Dst)
- VZEROUPPER
- RET
-
-// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
-TEXT ·hChaCha20SSE2(SB), 4, $0-24
- MOVQ out+0(FP), Dst
- MOVQ nonce+8(FP), Nonce
- MOVQ key+16(FP), Key
-
- MOVOU ·sigma<>(SB), X0
- MOVOU 0*16(Key), X1
- MOVOU 1*16(Key), X2
- MOVOU 0*16(Nonce), X3
- MOVQ $20, Rounds
-
-CHACHA_LOOP:
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- SUBQ $2, Rounds
- JNZ CHACHA_LOOP
-
- MOVOU X0, 0*16(Dst)
- MOVOU X3, 1*16(Dst)
- RET
-
-// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
-TEXT ·hChaCha20SSSE3(SB), 4, $0-24
- MOVQ out+0(FP), Dst
- MOVQ nonce+8(FP), Nonce
- MOVQ key+16(FP), Key
-
- MOVOU ·sigma<>(SB), X0
- MOVOU 0*16(Key), X1
- MOVOU 1*16(Key), X2
- MOVOU 0*16(Nonce), X3
- MOVOU ·rol16<>(SB), X5
- MOVOU ·rol8<>(SB), X6
- MOVQ $20, Rounds
-
-chacha_loop:
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- SUBQ $2, Rounds
- JNZ chacha_loop
-
- MOVOU X0, 0*16(Dst)
- MOVOU X3, 1*16(Dst)
- RET
-
-#undef Dst
-#undef Nonce
-#undef Key
-#undef Rounds
-
-#define Dst DI
-#define Src SI
-#define Len R12
-#define Rounds DX
-#define Buffer BX
-#define State AX
-#define Stack SP
-#define SavedSP R8
-#define Tmp0 R9
-#define Tmp1 R10
-#define Tmp2 R11
-
-// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
-TEXT ·xorKeyStreamSSE2(SB), 4, $112-80
- MOVQ dst_base+0(FP), Dst
- MOVQ src_base+24(FP), Src
- MOVQ block+48(FP), Buffer
- MOVQ state+56(FP), State
- MOVQ rounds+64(FP), Rounds
- MOVQ src_len+32(FP), Len
-
- MOVOU 0*16(State), X0
- MOVOU 1*16(State), X1
- MOVOU 2*16(State), X2
- MOVOU 3*16(State), X3
-
- MOVQ Stack, SavedSP
- ADDQ $16, Stack
- ANDQ $-16, Stack
-
- TESTQ Len, Len
- JZ DONE
-
- MOVOU ·one<>(SB), X4
- MOVO X0, 0*16(Stack)
- MOVO X1, 1*16(Stack)
- MOVO X2, 2*16(Stack)
- MOVO X3, 3*16(Stack)
- MOVO X4, 4*16(Stack)
-
- CMPQ Len, $64
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192
- JLE GENERATE_KEYSTREAM_192
-
-GENERATE_KEYSTREAM_256:
- MOVO X0, X12
- MOVO X1, X13
- MOVO X2, X14
- MOVO X3, X15
- PADDQ 4*16(Stack), X15
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X15, X11
- PADDQ 4*16(Stack), X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X11, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
- MOVO X3, 3*16(Stack) // Save X3
-
-CHACHA_LOOP_256:
- MOVO X4, 5*16(Stack)
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_QROUND_SSE2(X12, X13, X14, X15, X4)
- MOVO 5*16(Stack), X4
- MOVO X0, 5*16(Stack)
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- MOVO 5*16(Stack), X0
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_SHUFFLE_SSE(X13, X14, X15)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- MOVO X4, 5*16(Stack)
- CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
- CHACHA_QROUND_SSE2(X12, X13, X14, X15, X4)
- MOVO 5*16(Stack), X4
- MOVO X0, 5*16(Stack)
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- MOVO 5*16(Stack), X0
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- CHACHA_SHUFFLE_SSE(X15, X14, X13)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_256
-
- PADDL 0*16(Stack), X0
- PADDL 1*16(Stack), X1
- PADDL 2*16(Stack), X2
- PADDL 3*16(Stack), X3
- MOVO X4, 5*16(Stack) // Save X4
- XOR_SSE(Dst, Src, 0, X0, X1, X2, X3, X4)
- MOVO 5*16(Stack), X4 // Restore X4
-
- MOVO 0*16(Stack), X0
- MOVO 1*16(Stack), X1
- MOVO 2*16(Stack), X2
- MOVO 3*16(Stack), X3
- PADDQ 4*16(Stack), X3
-
- PADDL X0, X12
- PADDL X1, X13
- PADDL X2, X14
- PADDL X3, X15
- PADDQ 4*16(Stack), X3
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 64, X12, X13, X14, X15, X0)
- XOR_SSE(Dst, Src, 128, X8, X9, X10, X11, X0)
- MOVO 0*16(Stack), X0 // Restore X0
- ADDQ $192, Dst
- ADDQ $192, Src
- SUBQ $192, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream.
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream
- JG GENERATE_KEYSTREAM_256
-
-GENERATE_KEYSTREAM_192:
- MOVO X0, X12
- MOVO X1, X13
- MOVO X2, X14
- MOVO X3, X15
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X3, X11
- PADDQ 4*16(Stack), X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X11, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
-CHACHA_LOOP_192:
- CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- CHACHA_SHUFFLE_SSE(X13, X14, X15)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
- CHACHA_SHUFFLE_SSE(X15, X14, X13)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_192
-
- MOVO 0*16(Stack), X0 // Restore X0
- PADDL X0, X12
- PADDL X1, X13
- PADDL X2, X14
- PADDL X3, X15
- PADDQ 4*16(Stack), X3
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 0, X12, X13, X14, X15, X0)
- XOR_SSE(Dst, Src, 64, X8, X9, X10, X11, X0)
- MOVO 0*16(Stack), X0 // Restore X0
- ADDQ $128, Dst
- ADDQ $128, Src
- SUBQ $128, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
-
-GENERATE_KEYSTREAM_128:
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X3, X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X3, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
-CHACHA_LOOP_128:
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_128
-
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 0, X8, X9, X10, X11, X12)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream
-
-GENERATE_KEYSTREAM_64:
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X3, X7
- MOVQ Rounds, Tmp0
-
-CHACHA_LOOP_64:
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_64
-
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Src
- ADDQ $64, Dst
- SUBQ $64, Len
- JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true.
-
-BUFFER_KEYSTREAM:
- MOVOU X4, 0*16(Buffer)
- MOVOU X5, 1*16(Buffer)
- MOVOU X6, 2*16(Buffer)
- MOVOU X7, 3*16(Buffer)
- MOVQ Len, Tmp0
- FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2)
-
-DONE:
- MOVQ SavedSP, Stack // Restore stack pointer
- MOVOU X3, 3*16(State)
- MOVQ Len, ret+72(FP)
- RET
-
-// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
-TEXT ·xorKeyStreamSSSE3(SB), 4, $144-80
- MOVQ dst_base+0(FP), Dst
- MOVQ src_base+24(FP), Src
- MOVQ block+48(FP), Buffer
- MOVQ state+56(FP), State
- MOVQ rounds+64(FP), Rounds
- MOVQ src_len+32(FP), Len
-
- MOVOU 0*16(State), X0
- MOVOU 1*16(State), X1
- MOVOU 2*16(State), X2
- MOVOU 3*16(State), X3
-
- MOVQ Stack, SavedSP
- ADDQ $16, Stack
- ANDQ $-16, Stack
-
- TESTQ Len, Len
- JZ DONE
-
- MOVOU ·one<>(SB), X4
- MOVOU ·rol16<>(SB), X5
- MOVOU ·rol8<>(SB), X6
- MOVO X0, 0*16(Stack)
- MOVO X1, 1*16(Stack)
- MOVO X2, 2*16(Stack)
- MOVO X3, 3*16(Stack)
- MOVO X4, 4*16(Stack)
- MOVO X5, 6*16(Stack)
- MOVO X6, 7*16(Stack)
-
- CMPQ Len, $64
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192
- JLE GENERATE_KEYSTREAM_192
-
-GENERATE_KEYSTREAM_256:
- MOVO X0, X12
- MOVO X1, X13
- MOVO X2, X14
- MOVO X3, X15
- PADDQ 4*16(Stack), X15
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X15, X11
- PADDQ 4*16(Stack), X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X11, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
- MOVO X3, 3*16(Stack) // Save X3
-
-CHACHA_LOOP_256:
- MOVO X4, 5*16(Stack)
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack))
- MOVO 5*16(Stack), X4
- MOVO X0, 5*16(Stack)
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack))
- MOVO 5*16(Stack), X0
- CHACHA_SHUFFLE_SSE(X1, X2, X3)
- CHACHA_SHUFFLE_SSE(X13, X14, X15)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- MOVO X4, 5*16(Stack)
- CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack))
- MOVO 5*16(Stack), X4
- MOVO X0, 5*16(Stack)
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack))
- MOVO 5*16(Stack), X0
- CHACHA_SHUFFLE_SSE(X3, X2, X1)
- CHACHA_SHUFFLE_SSE(X15, X14, X13)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_256
-
- PADDL 0*16(Stack), X0
- PADDL 1*16(Stack), X1
- PADDL 2*16(Stack), X2
- PADDL 3*16(Stack), X3
- MOVO X4, 5*16(Stack) // Save X4
- XOR_SSE(Dst, Src, 0, X0, X1, X2, X3, X4)
- MOVO 5*16(Stack), X4 // Restore X4
-
- MOVO 0*16(Stack), X0
- MOVO 1*16(Stack), X1
- MOVO 2*16(Stack), X2
- MOVO 3*16(Stack), X3
- PADDQ 4*16(Stack), X3
-
- PADDL X0, X12
- PADDL X1, X13
- PADDL X2, X14
- PADDL X3, X15
- PADDQ 4*16(Stack), X3
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 64, X12, X13, X14, X15, X0)
- XOR_SSE(Dst, Src, 128, X8, X9, X10, X11, X0)
- MOVO 0*16(Stack), X0 // Restore X0
- ADDQ $192, Dst
- ADDQ $192, Src
- SUBQ $192, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream.
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream
- JG GENERATE_KEYSTREAM_256
-
-GENERATE_KEYSTREAM_192:
- MOVO X0, X12
- MOVO X1, X13
- MOVO X2, X14
- MOVO X3, X15
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X3, X11
- PADDQ 4*16(Stack), X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X11, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
- MOVO 6*16(Stack), X1 // Load 16 bit rotate-left constant
- MOVO 7*16(Stack), X2 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_192:
- CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, X1, X2)
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, X1, X2)
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
- CHACHA_SHUFFLE_SSE(X13, X14, X15)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, X1, X2)
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, X1, X2)
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
- CHACHA_SHUFFLE_SSE(X15, X14, X13)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_192
-
- MOVO 0*16(Stack), X0 // Restore X0
- MOVO 1*16(Stack), X1 // Restore X1
- MOVO 2*16(Stack), X2 // Restore X2
- PADDL X0, X12
- PADDL X1, X13
- PADDL X2, X14
- PADDL X3, X15
- PADDQ 4*16(Stack), X3
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 0, X12, X13, X14, X15, X0)
- XOR_SSE(Dst, Src, 64, X8, X9, X10, X11, X0)
- MOVO 0*16(Stack), X0 // Restore X0
- ADDQ $128, Dst
- ADDQ $128, Src
- SUBQ $128, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
-
-GENERATE_KEYSTREAM_128:
- MOVO X0, X8
- MOVO X1, X9
- MOVO X2, X10
- MOVO X3, X11
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X3, X7
- PADDQ 4*16(Stack), X7
- MOVQ Rounds, Tmp0
-
- MOVO 6*16(Stack), X13 // Load 16 bit rotate-left constant
- MOVO 7*16(Stack), X14 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_128:
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
- CHACHA_SHUFFLE_SSE(X9, X10, X11)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
- CHACHA_SHUFFLE_SSE(X11, X10, X9)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_128
-
- PADDL X0, X8
- PADDL X1, X9
- PADDL X2, X10
- PADDL X3, X11
- PADDQ 4*16(Stack), X3
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- XOR_SSE(Dst, Src, 0, X8, X9, X10, X11, X12)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream
-
-GENERATE_KEYSTREAM_64:
- MOVO X0, X4
- MOVO X1, X5
- MOVO X2, X6
- MOVO X3, X7
- MOVQ Rounds, Tmp0
-
- MOVO 6*16(Stack), X9 // Load 16 bit rotate-left constant
- MOVO 7*16(Stack), X10 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_64:
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10)
- CHACHA_SHUFFLE_SSE(X5, X6, X7)
- CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10)
- CHACHA_SHUFFLE_SSE(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_64
-
- PADDL X0, X4
- PADDL X1, X5
- PADDL X2, X6
- PADDL X3, X7
- PADDQ 4*16(Stack), X3
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Src
- ADDQ $64, Dst
- SUBQ $64, Len
- JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true.
-
-BUFFER_KEYSTREAM:
- MOVOU X4, 0*16(Buffer)
- MOVOU X5, 1*16(Buffer)
- MOVOU X6, 2*16(Buffer)
- MOVOU X7, 3*16(Buffer)
- MOVQ Len, Tmp0
- FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2)
-
-DONE:
- MOVQ SavedSP, Stack // Restore stack pointer
- MOVOU X3, 3*16(State)
- MOVQ Len, ret+72(FP)
- RET
-
-// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
-TEXT ·xorKeyStreamAVX(SB), 4, $144-80
- MOVQ dst_base+0(FP), Dst
- MOVQ src_base+24(FP), Src
- MOVQ block+48(FP), Buffer
- MOVQ state+56(FP), State
- MOVQ rounds+64(FP), Rounds
- MOVQ src_len+32(FP), Len
-
- VMOVDQU 0*16(State), X0
- VMOVDQU 1*16(State), X1
- VMOVDQU 2*16(State), X2
- VMOVDQU 3*16(State), X3
-
- MOVQ Stack, SavedSP
- ADDQ $16, Stack
- ANDQ $-16, Stack
-
- TESTQ Len, Len
- JZ DONE
-
- VMOVDQU ·one<>(SB), X4
- VMOVDQU ·rol16<>(SB), X5
- VMOVDQU ·rol8<>(SB), X6
- VMOVDQA X0, 0*16(Stack)
- VMOVDQA X1, 1*16(Stack)
- VMOVDQA X2, 2*16(Stack)
- VMOVDQA X3, 3*16(Stack)
- VMOVDQA X4, 4*16(Stack)
- VMOVDQA X5, 6*16(Stack)
- VMOVDQA X6, 7*16(Stack)
-
- CMPQ Len, $64
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192
- JLE GENERATE_KEYSTREAM_192
-
-GENERATE_KEYSTREAM_256:
- VMOVDQA X0, X12
- VMOVDQA X1, X13
- VMOVDQA X2, X14
- VMOVDQA X3, X15
- VPADDQ 4*16(Stack), X15, X15
- VMOVDQA X0, X8
- VMOVDQA X1, X9
- VMOVDQA X2, X10
- VMOVDQA X15, X11
- VPADDQ 4*16(Stack), X11, X11
- VMOVDQA X0, X4
- VMOVDQA X1, X5
- VMOVDQA X2, X6
- VMOVDQA X11, X7
- VPADDQ 4*16(Stack), X7, X7
- MOVQ Rounds, Tmp0
-
- VMOVDQA X3, 3*16(Stack) // Save X3
-
-CHACHA_LOOP_256:
- VMOVDQA X4, 5*16(Stack)
- CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_AVX(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack))
- VMOVDQA 5*16(Stack), X4
- VMOVDQA X0, 5*16(Stack)
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack))
- VMOVDQA 5*16(Stack), X0
- CHACHA_SHUFFLE_AVX(X1, X2, X3)
- CHACHA_SHUFFLE_AVX(X13, X14, X15)
- CHACHA_SHUFFLE_AVX(X9, X10, X11)
- CHACHA_SHUFFLE_AVX(X5, X6, X7)
- VMOVDQA X4, 5*16(Stack)
- CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_AVX(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack))
- VMOVDQA 5*16(Stack), X4
- VMOVDQA X0, 5*16(Stack)
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack))
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack))
- VMOVDQA 5*16(Stack), X0
- CHACHA_SHUFFLE_AVX(X3, X2, X1)
- CHACHA_SHUFFLE_AVX(X15, X14, X13)
- CHACHA_SHUFFLE_AVX(X11, X10, X9)
- CHACHA_SHUFFLE_AVX(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_256
-
- VPADDD 0*16(Stack), X0, X0
- VPADDD 1*16(Stack), X1, X1
- VPADDD 2*16(Stack), X2, X2
- VPADDD 3*16(Stack), X3, X3
- VMOVDQA X4, 5*16(Stack) // Save X4
- XOR_AVX(Dst, Src, 0, X0, X1, X2, X3, X4)
- VMOVDQA 5*16(Stack), X4 // Restore X4
-
- VMOVDQA 0*16(Stack), X0
- VMOVDQA 1*16(Stack), X1
- VMOVDQA 2*16(Stack), X2
- VMOVDQA 3*16(Stack), X3
- VPADDQ 4*16(Stack), X3, X3
-
- VPADDD X0, X12, X12
- VPADDD X1, X13, X13
- VPADDD X2, X14, X14
- VPADDD X3, X15, X15
- VPADDQ 4*16(Stack), X3, X3
- VPADDD X0, X8, X8
- VPADDD X1, X9, X9
- VPADDD X2, X10, X10
- VPADDD X3, X11, X11
- VPADDQ 4*16(Stack), X3, X3
- VPADDD X0, X4, X4
- VPADDD X1, X5, X5
- VPADDD X2, X6, X6
- VPADDD X3, X7, X7
- VPADDQ 4*16(Stack), X3, X3
-
- XOR_AVX(Dst, Src, 64, X12, X13, X14, X15, X0)
- XOR_AVX(Dst, Src, 128, X8, X9, X10, X11, X0)
- VMOVDQA 0*16(Stack), X0 // Restore X0
- ADDQ $192, Dst
- ADDQ $192, Src
- SUBQ $192, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
- CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream.
- JLE GENERATE_KEYSTREAM_128
- CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream
- JG GENERATE_KEYSTREAM_256
-
-GENERATE_KEYSTREAM_192:
- VMOVDQA X0, X12
- VMOVDQA X1, X13
- VMOVDQA X2, X14
- VMOVDQA X3, X15
- VMOVDQA X0, X8
- VMOVDQA X1, X9
- VMOVDQA X2, X10
- VMOVDQA X3, X11
- VPADDQ 4*16(Stack), X11, X11
- VMOVDQA X0, X4
- VMOVDQA X1, X5
- VMOVDQA X2, X6
- VMOVDQA X11, X7
- VPADDQ 4*16(Stack), X7, X7
- MOVQ Rounds, Tmp0
-
- VMOVDQA 6*16(Stack), X1 // Load 16 bit rotate-left constant
- VMOVDQA 7*16(Stack), X2 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_192:
- CHACHA_QROUND_AVX(X12, X13, X14, X15, X0, X1, X2)
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, X1, X2)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, X1, X2)
- CHACHA_SHUFFLE_AVX(X13, X14, X15)
- CHACHA_SHUFFLE_AVX(X9, X10, X11)
- CHACHA_SHUFFLE_AVX(X5, X6, X7)
- CHACHA_QROUND_AVX(X12, X13, X14, X15, X0, X1, X2)
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, X1, X2)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, X1, X2)
- CHACHA_SHUFFLE_AVX(X15, X14, X13)
- CHACHA_SHUFFLE_AVX(X11, X10, X9)
- CHACHA_SHUFFLE_AVX(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_192
-
- VMOVDQA 0*16(Stack), X0 // Restore X0
- VMOVDQA 1*16(Stack), X1 // Restore X1
- VMOVDQA 2*16(Stack), X2 // Restore X2
- VPADDD X0, X12, X12
- VPADDD X1, X13, X13
- VPADDD X2, X14, X14
- VPADDD X3, X15, X15
- VPADDQ 4*16(Stack), X3, X3
- VPADDD X0, X8, X8
- VPADDD X1, X9, X9
- VPADDD X2, X10, X10
- VPADDD X3, X11, X11
- VPADDQ 4*16(Stack), X3, X3
- VPADDD X0, X4, X4
- VPADDD X1, X5, X5
- VPADDD X2, X6, X6
- VPADDD X3, X7, X7
- VPADDQ 4*16(Stack), X3, X3
-
- XOR_AVX(Dst, Src, 0, X12, X13, X14, X15, X0)
- XOR_AVX(Dst, Src, 64, X8, X9, X10, X11, X0)
- VMOVDQA 0*16(Stack), X0 // Restore X0
- ADDQ $128, Dst
- ADDQ $128, Src
- SUBQ $128, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE
- CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream.
- JLE GENERATE_KEYSTREAM_64
-
-GENERATE_KEYSTREAM_128:
- VMOVDQA X0, X8
- VMOVDQA X1, X9
- VMOVDQA X2, X10
- VMOVDQA X3, X11
- VMOVDQA X0, X4
- VMOVDQA X1, X5
- VMOVDQA X2, X6
- VMOVDQA X3, X7
- VPADDQ 4*16(Stack), X7, X7
- MOVQ Rounds, Tmp0
-
- VMOVDQA 6*16(Stack), X13 // Load 16 bit rotate-left constant
- VMOVDQA 7*16(Stack), X14 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_128:
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X12, X13, X14)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X12, X13, X14)
- CHACHA_SHUFFLE_AVX(X9, X10, X11)
- CHACHA_SHUFFLE_AVX(X5, X6, X7)
- CHACHA_QROUND_AVX(X8, X9, X10, X11, X12, X13, X14)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X12, X13, X14)
- CHACHA_SHUFFLE_AVX(X11, X10, X9)
- CHACHA_SHUFFLE_AVX(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_128
-
- VPADDD X0, X8, X8
- VPADDD X1, X9, X9
- VPADDD X2, X10, X10
- VPADDD X3, X11, X11
- VPADDQ 4*16(Stack), X3, X3
- VPADDD X0, X4, X4
- VPADDD X1, X5, X5
- VPADDD X2, X6, X6
- VPADDD X3, X7, X7
- VPADDQ 4*16(Stack), X3, X3
-
- XOR_AVX(Dst, Src, 0, X8, X9, X10, X11, X12)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Dst
- ADDQ $64, Src
- SUBQ $64, Len
- JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream
-
-GENERATE_KEYSTREAM_64:
- VMOVDQA X0, X4
- VMOVDQA X1, X5
- VMOVDQA X2, X6
- VMOVDQA X3, X7
- MOVQ Rounds, Tmp0
-
- VMOVDQA 6*16(Stack), X9 // Load 16 bit rotate-left constant
- VMOVDQA 7*16(Stack), X10 // Load 8 bit rotate-left constant
-
-CHACHA_LOOP_64:
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X8, X9, X10)
- CHACHA_SHUFFLE_AVX(X5, X6, X7)
- CHACHA_QROUND_AVX(X4, X5, X6, X7, X8, X9, X10)
- CHACHA_SHUFFLE_AVX(X7, X6, X5)
- SUBQ $2, Tmp0
- JNZ CHACHA_LOOP_64
-
- VPADDD X0, X4, X4
- VPADDD X1, X5, X5
- VPADDD X2, X6, X6
- VPADDD X3, X7, X7
- VPADDQ 4*16(Stack), X3, X3
-
- CMPQ Len, $64
- JL BUFFER_KEYSTREAM
-
- XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8)
- ADDQ $64, Src
- ADDQ $64, Dst
- SUBQ $64, Len
- JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true.
-
-BUFFER_KEYSTREAM:
- VMOVDQU X4, 0*16(Buffer)
- VMOVDQU X5, 1*16(Buffer)
- VMOVDQU X6, 2*16(Buffer)
- VMOVDQU X7, 3*16(Buffer)
- MOVQ Len, Tmp0
- FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2)
-
-DONE:
- MOVQ SavedSP, Stack // Restore stack pointer
- VMOVDQU X3, 3*16(State)
- VZEROUPPER
- MOVQ Len, ret+72(FP)
- RET
-
-#undef Dst
-#undef Src
-#undef Len
-#undef Rounds
-#undef Buffer
-#undef State
-#undef Stack
-#undef SavedSP
-#undef Tmp0
-#undef Tmp1
-#undef Tmp2
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_generic.go b/vendor/github.com/aead/chacha20/chacha/chacha_generic.go
deleted file mode 100644
index 8832d5b..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_generic.go
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-package chacha
-
-import "encoding/binary"
-
-var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
-
-func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int {
- for len(src) >= 64 {
- chachaGeneric(block, state, rounds)
-
- for i, v := range block {
- dst[i] = src[i] ^ v
- }
- src = src[64:]
- dst = dst[64:]
- }
-
- n := len(src)
- if n > 0 {
- chachaGeneric(block, state, rounds)
- for i, v := range src {
- dst[i] = v ^ block[i]
- }
- }
- return n
-}
-
-func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) {
- v00 := binary.LittleEndian.Uint32(state[0:])
- v01 := binary.LittleEndian.Uint32(state[4:])
- v02 := binary.LittleEndian.Uint32(state[8:])
- v03 := binary.LittleEndian.Uint32(state[12:])
- v04 := binary.LittleEndian.Uint32(state[16:])
- v05 := binary.LittleEndian.Uint32(state[20:])
- v06 := binary.LittleEndian.Uint32(state[24:])
- v07 := binary.LittleEndian.Uint32(state[28:])
- v08 := binary.LittleEndian.Uint32(state[32:])
- v09 := binary.LittleEndian.Uint32(state[36:])
- v10 := binary.LittleEndian.Uint32(state[40:])
- v11 := binary.LittleEndian.Uint32(state[44:])
- v12 := binary.LittleEndian.Uint32(state[48:])
- v13 := binary.LittleEndian.Uint32(state[52:])
- v14 := binary.LittleEndian.Uint32(state[56:])
- v15 := binary.LittleEndian.Uint32(state[60:])
-
- s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
- s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
-
- for i := 0; i < rounds; i += 2 {
- v00 += v04
- v12 ^= v00
- v12 = (v12 << 16) | (v12 >> 16)
- v08 += v12
- v04 ^= v08
- v04 = (v04 << 12) | (v04 >> 20)
- v00 += v04
- v12 ^= v00
- v12 = (v12 << 8) | (v12 >> 24)
- v08 += v12
- v04 ^= v08
- v04 = (v04 << 7) | (v04 >> 25)
- v01 += v05
- v13 ^= v01
- v13 = (v13 << 16) | (v13 >> 16)
- v09 += v13
- v05 ^= v09
- v05 = (v05 << 12) | (v05 >> 20)
- v01 += v05
- v13 ^= v01
- v13 = (v13 << 8) | (v13 >> 24)
- v09 += v13
- v05 ^= v09
- v05 = (v05 << 7) | (v05 >> 25)
- v02 += v06
- v14 ^= v02
- v14 = (v14 << 16) | (v14 >> 16)
- v10 += v14
- v06 ^= v10
- v06 = (v06 << 12) | (v06 >> 20)
- v02 += v06
- v14 ^= v02
- v14 = (v14 << 8) | (v14 >> 24)
- v10 += v14
- v06 ^= v10
- v06 = (v06 << 7) | (v06 >> 25)
- v03 += v07
- v15 ^= v03
- v15 = (v15 << 16) | (v15 >> 16)
- v11 += v15
- v07 ^= v11
- v07 = (v07 << 12) | (v07 >> 20)
- v03 += v07
- v15 ^= v03
- v15 = (v15 << 8) | (v15 >> 24)
- v11 += v15
- v07 ^= v11
- v07 = (v07 << 7) | (v07 >> 25)
- v00 += v05
- v15 ^= v00
- v15 = (v15 << 16) | (v15 >> 16)
- v10 += v15
- v05 ^= v10
- v05 = (v05 << 12) | (v05 >> 20)
- v00 += v05
- v15 ^= v00
- v15 = (v15 << 8) | (v15 >> 24)
- v10 += v15
- v05 ^= v10
- v05 = (v05 << 7) | (v05 >> 25)
- v01 += v06
- v12 ^= v01
- v12 = (v12 << 16) | (v12 >> 16)
- v11 += v12
- v06 ^= v11
- v06 = (v06 << 12) | (v06 >> 20)
- v01 += v06
- v12 ^= v01
- v12 = (v12 << 8) | (v12 >> 24)
- v11 += v12
- v06 ^= v11
- v06 = (v06 << 7) | (v06 >> 25)
- v02 += v07
- v13 ^= v02
- v13 = (v13 << 16) | (v13 >> 16)
- v08 += v13
- v07 ^= v08
- v07 = (v07 << 12) | (v07 >> 20)
- v02 += v07
- v13 ^= v02
- v13 = (v13 << 8) | (v13 >> 24)
- v08 += v13
- v07 ^= v08
- v07 = (v07 << 7) | (v07 >> 25)
- v03 += v04
- v14 ^= v03
- v14 = (v14 << 16) | (v14 >> 16)
- v09 += v14
- v04 ^= v09
- v04 = (v04 << 12) | (v04 >> 20)
- v03 += v04
- v14 ^= v03
- v14 = (v14 << 8) | (v14 >> 24)
- v09 += v14
- v04 ^= v09
- v04 = (v04 << 7) | (v04 >> 25)
- }
-
- v00 += s00
- v01 += s01
- v02 += s02
- v03 += s03
- v04 += s04
- v05 += s05
- v06 += s06
- v07 += s07
- v08 += s08
- v09 += s09
- v10 += s10
- v11 += s11
- v12 += s12
- v13 += s13
- v14 += s14
- v15 += s15
-
- s12++
- binary.LittleEndian.PutUint32(state[48:], s12)
- if s12 == 0 { // indicates overflow
- s13++
- binary.LittleEndian.PutUint32(state[52:], s13)
- }
-
- binary.LittleEndian.PutUint32(dst[0:], v00)
- binary.LittleEndian.PutUint32(dst[4:], v01)
- binary.LittleEndian.PutUint32(dst[8:], v02)
- binary.LittleEndian.PutUint32(dst[12:], v03)
- binary.LittleEndian.PutUint32(dst[16:], v04)
- binary.LittleEndian.PutUint32(dst[20:], v05)
- binary.LittleEndian.PutUint32(dst[24:], v06)
- binary.LittleEndian.PutUint32(dst[28:], v07)
- binary.LittleEndian.PutUint32(dst[32:], v08)
- binary.LittleEndian.PutUint32(dst[36:], v09)
- binary.LittleEndian.PutUint32(dst[40:], v10)
- binary.LittleEndian.PutUint32(dst[44:], v11)
- binary.LittleEndian.PutUint32(dst[48:], v12)
- binary.LittleEndian.PutUint32(dst[52:], v13)
- binary.LittleEndian.PutUint32(dst[56:], v14)
- binary.LittleEndian.PutUint32(dst[60:], v15)
-}
-
-func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) {
- v00 := sigma[0]
- v01 := sigma[1]
- v02 := sigma[2]
- v03 := sigma[3]
- v04 := binary.LittleEndian.Uint32(key[0:])
- v05 := binary.LittleEndian.Uint32(key[4:])
- v06 := binary.LittleEndian.Uint32(key[8:])
- v07 := binary.LittleEndian.Uint32(key[12:])
- v08 := binary.LittleEndian.Uint32(key[16:])
- v09 := binary.LittleEndian.Uint32(key[20:])
- v10 := binary.LittleEndian.Uint32(key[24:])
- v11 := binary.LittleEndian.Uint32(key[28:])
- v12 := binary.LittleEndian.Uint32(nonce[0:])
- v13 := binary.LittleEndian.Uint32(nonce[4:])
- v14 := binary.LittleEndian.Uint32(nonce[8:])
- v15 := binary.LittleEndian.Uint32(nonce[12:])
-
- for i := 0; i < 20; i += 2 {
- v00 += v04
- v12 ^= v00
- v12 = (v12 << 16) | (v12 >> 16)
- v08 += v12
- v04 ^= v08
- v04 = (v04 << 12) | (v04 >> 20)
- v00 += v04
- v12 ^= v00
- v12 = (v12 << 8) | (v12 >> 24)
- v08 += v12
- v04 ^= v08
- v04 = (v04 << 7) | (v04 >> 25)
- v01 += v05
- v13 ^= v01
- v13 = (v13 << 16) | (v13 >> 16)
- v09 += v13
- v05 ^= v09
- v05 = (v05 << 12) | (v05 >> 20)
- v01 += v05
- v13 ^= v01
- v13 = (v13 << 8) | (v13 >> 24)
- v09 += v13
- v05 ^= v09
- v05 = (v05 << 7) | (v05 >> 25)
- v02 += v06
- v14 ^= v02
- v14 = (v14 << 16) | (v14 >> 16)
- v10 += v14
- v06 ^= v10
- v06 = (v06 << 12) | (v06 >> 20)
- v02 += v06
- v14 ^= v02
- v14 = (v14 << 8) | (v14 >> 24)
- v10 += v14
- v06 ^= v10
- v06 = (v06 << 7) | (v06 >> 25)
- v03 += v07
- v15 ^= v03
- v15 = (v15 << 16) | (v15 >> 16)
- v11 += v15
- v07 ^= v11
- v07 = (v07 << 12) | (v07 >> 20)
- v03 += v07
- v15 ^= v03
- v15 = (v15 << 8) | (v15 >> 24)
- v11 += v15
- v07 ^= v11
- v07 = (v07 << 7) | (v07 >> 25)
- v00 += v05
- v15 ^= v00
- v15 = (v15 << 16) | (v15 >> 16)
- v10 += v15
- v05 ^= v10
- v05 = (v05 << 12) | (v05 >> 20)
- v00 += v05
- v15 ^= v00
- v15 = (v15 << 8) | (v15 >> 24)
- v10 += v15
- v05 ^= v10
- v05 = (v05 << 7) | (v05 >> 25)
- v01 += v06
- v12 ^= v01
- v12 = (v12 << 16) | (v12 >> 16)
- v11 += v12
- v06 ^= v11
- v06 = (v06 << 12) | (v06 >> 20)
- v01 += v06
- v12 ^= v01
- v12 = (v12 << 8) | (v12 >> 24)
- v11 += v12
- v06 ^= v11
- v06 = (v06 << 7) | (v06 >> 25)
- v02 += v07
- v13 ^= v02
- v13 = (v13 << 16) | (v13 >> 16)
- v08 += v13
- v07 ^= v08
- v07 = (v07 << 12) | (v07 >> 20)
- v02 += v07
- v13 ^= v02
- v13 = (v13 << 8) | (v13 >> 24)
- v08 += v13
- v07 ^= v08
- v07 = (v07 << 7) | (v07 >> 25)
- v03 += v04
- v14 ^= v03
- v14 = (v14 << 16) | (v14 >> 16)
- v09 += v14
- v04 ^= v09
- v04 = (v04 << 12) | (v04 >> 20)
- v03 += v04
- v14 ^= v03
- v14 = (v14 << 8) | (v14 >> 24)
- v09 += v14
- v04 ^= v09
- v04 = (v04 << 7) | (v04 >> 25)
- }
-
- binary.LittleEndian.PutUint32(out[0:], v00)
- binary.LittleEndian.PutUint32(out[4:], v01)
- binary.LittleEndian.PutUint32(out[8:], v02)
- binary.LittleEndian.PutUint32(out[12:], v03)
- binary.LittleEndian.PutUint32(out[16:], v12)
- binary.LittleEndian.PutUint32(out[20:], v13)
- binary.LittleEndian.PutUint32(out[24:], v14)
- binary.LittleEndian.PutUint32(out[28:], v15)
-}
diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_ref.go b/vendor/github.com/aead/chacha20/chacha/chacha_ref.go
deleted file mode 100644
index 526877c..0000000
--- a/vendor/github.com/aead/chacha20/chacha/chacha_ref.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build !amd64,!386 gccgo appengine nacl
-
-package chacha
-
-import "encoding/binary"
-
-func init() {
- useSSE2 = false
- useSSSE3 = false
- useAVX = false
- useAVX2 = false
-}
-
-func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
- binary.LittleEndian.PutUint32(state[0:], sigma[0])
- binary.LittleEndian.PutUint32(state[4:], sigma[1])
- binary.LittleEndian.PutUint32(state[8:], sigma[2])
- binary.LittleEndian.PutUint32(state[12:], sigma[3])
- copy(state[16:], key[:])
- copy(state[48:], nonce[:])
-}
-
-func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
- return xorKeyStreamGeneric(dst, src, block, state, rounds)
-}
-
-func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
- hChaCha20Generic(out, nonce, key)
-}
diff --git a/vendor/github.com/aead/chacha20/chacha/const.s b/vendor/github.com/aead/chacha20/chacha/const.s
deleted file mode 100644
index c7a94a4..0000000
--- a/vendor/github.com/aead/chacha20/chacha/const.s
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
-
-#include "textflag.h"
-
-DATA ·sigma<>+0x00(SB)/4, $0x61707865
-DATA ·sigma<>+0x04(SB)/4, $0x3320646e
-DATA ·sigma<>+0x08(SB)/4, $0x79622d32
-DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
-GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants
-
-// SSE2/SSE3/AVX constants
-
-DATA ·one<>+0x00(SB)/8, $1
-DATA ·one<>+0x08(SB)/8, $0
-GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value
-
-DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
-DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
-GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant
-
-DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
-DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
-GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant
-
-// AVX2 constants
-
-DATA ·one_AVX2<>+0x00(SB)/8, $0
-DATA ·one_AVX2<>+0x08(SB)/8, $0
-DATA ·one_AVX2<>+0x10(SB)/8, $1
-DATA ·one_AVX2<>+0x18(SB)/8, $0
-GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value
-
-DATA ·two_AVX2<>+0x00(SB)/8, $2
-DATA ·two_AVX2<>+0x08(SB)/8, $0
-DATA ·two_AVX2<>+0x10(SB)/8, $2
-DATA ·two_AVX2<>+0x18(SB)/8, $0
-GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
-
-DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
-DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
-DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
-DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
-GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant
-
-DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
-DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
-DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
-DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
-GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant
diff --git a/vendor/github.com/aead/chacha20/chacha/macro.s b/vendor/github.com/aead/chacha20/chacha/macro.s
deleted file mode 100644
index 780108f..0000000
--- a/vendor/github.com/aead/chacha20/chacha/macro.s
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-
-// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
-
-// ROTL_SSE rotates all 4 32 bit values of the XMM register v
-// left by n bits using SSE2 instructions (0 <= n <= 32).
-// The XMM register t is used as a temp. register.
-#define ROTL_SSE(n, t, v) \
- MOVO v, t; \
- PSLLL $n, t; \
- PSRLL $(32-n), v; \
- PXOR t, v
-
-// ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v
-// left by n bits using AVX/AVX2 instructions (0 <= n <= 32).
-// The AVX/AVX2 register t is used as a temp. register.
-#define ROTL_AVX(n, t, v) \
- VPSLLD $n, v, t; \
- VPSRLD $(32-n), v, v; \
- VPXOR v, t, v
-
-// CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the
-// 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for
-// rotations. The XMM register t is used as a temp. register.
-#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \
- PADDL v1, v0; \
- PXOR v0, v3; \
- ROTL_SSE(16, t, v3); \
- PADDL v3, v2; \
- PXOR v2, v1; \
- ROTL_SSE(12, t, v1); \
- PADDL v1, v0; \
- PXOR v0, v3; \
- ROTL_SSE(8, t, v3); \
- PADDL v3, v2; \
- PXOR v2, v1; \
- ROTL_SSE(7, t, v1)
-
-// CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the
-// 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit
-// rotations. The XMM register t is used as a temp. register.
-//
-// r16 holds the PSHUFB constant for a 16 bit left rotate.
-// r8 holds the PSHUFB constant for a 8 bit left rotate.
-#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \
- PADDL v1, v0; \
- PXOR v0, v3; \
- PSHUFB r16, v3; \
- PADDL v3, v2; \
- PXOR v2, v1; \
- ROTL_SSE(12, t, v1); \
- PADDL v1, v0; \
- PXOR v0, v3; \
- PSHUFB r8, v3; \
- PADDL v3, v2; \
- PXOR v2, v1; \
- ROTL_SSE(7, t, v1)
-
-// CHACHA_QROUND_AVX performs a ChaCha quarter-round using the
-// 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit
-// rotations. The AVX/AVX2 register t is used as a temp. register.
-//
-// r16 holds the VPSHUFB constant for a 16 bit left rotate.
-// r8 holds the VPSHUFB constant for a 8 bit left rotate.
-#define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \
- VPADDD v0, v1, v0; \
- VPXOR v3, v0, v3; \
- VPSHUFB r16, v3, v3; \
- VPADDD v2, v3, v2; \
- VPXOR v1, v2, v1; \
- ROTL_AVX(12, t, v1); \
- VPADDD v0, v1, v0; \
- VPXOR v3, v0, v3; \
- VPSHUFB r8, v3, v3; \
- VPADDD v2, v3, v2; \
- VPXOR v1, v2, v1; \
- ROTL_AVX(7, t, v1)
-
-// CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the
-// 3 XMM registers v1, v2 and v3. The inverse shuffle is
-// performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1).
-#define CHACHA_SHUFFLE_SSE(v1, v2, v3) \
- PSHUFL $0x39, v1, v1; \
- PSHUFL $0x4E, v2, v2; \
- PSHUFL $0x93, v3, v3
-
-// CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the
-// 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is
-// performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1).
-#define CHACHA_SHUFFLE_AVX(v1, v2, v3) \
- VPSHUFD $0x39, v1, v1; \
- VPSHUFD $0x4E, v2, v2; \
- VPSHUFD $0x93, v3, v3
-
-// XOR_SSE extracts 4x16 byte vectors from src at
-// off, xors all vectors with the corresponding XMM
-// register (v0 - v3) and writes the result to dst
-// at off.
-// The XMM register t is used as a temp. register.
-#define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \
- MOVOU 0+off(src), t; \
- PXOR v0, t; \
- MOVOU t, 0+off(dst); \
- MOVOU 16+off(src), t; \
- PXOR v1, t; \
- MOVOU t, 16+off(dst); \
- MOVOU 32+off(src), t; \
- PXOR v2, t; \
- MOVOU t, 32+off(dst); \
- MOVOU 48+off(src), t; \
- PXOR v3, t; \
- MOVOU t, 48+off(dst)
-
-// XOR_AVX extracts 4x16 byte vectors from src at
-// off, xors all vectors with the corresponding AVX
-// register (v0 - v3) and writes the result to dst
-// at off.
-// The XMM register t is used as a temp. register.
-#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \
- VPXOR 0+off(src), v0, t; \
- VMOVDQU t, 0+off(dst); \
- VPXOR 16+off(src), v1, t; \
- VMOVDQU t, 16+off(dst); \
- VPXOR 32+off(src), v2, t; \
- VMOVDQU t, 32+off(dst); \
- VPXOR 48+off(src), v3, t; \
- VMOVDQU t, 48+off(dst)
-
-#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
- VMOVDQU (0+off)(src), t0; \
- VPERM2I128 $32, v1, v0, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (0+off)(dst); \
- VMOVDQU (32+off)(src), t0; \
- VPERM2I128 $32, v3, v2, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (32+off)(dst); \
- VMOVDQU (64+off)(src), t0; \
- VPERM2I128 $49, v1, v0, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (64+off)(dst); \
- VMOVDQU (96+off)(src), t0; \
- VPERM2I128 $49, v3, v2, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (96+off)(dst)
-
-#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
- VMOVDQU (0+off)(src), t0; \
- VPERM2I128 $32, v1, v0, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (0+off)(dst); \
- VMOVDQU (32+off)(src), t0; \
- VPERM2I128 $32, v3, v2, t1; \
- VPXOR t0, t1, t0; \
- VMOVDQU t0, (32+off)(dst); \
-
-#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
- VPERM2I128 $49, v1, v0, t0; \
- VMOVDQU t0, 0(dst); \
- VPERM2I128 $49, v3, v2, t0; \
- VMOVDQU t0, 32(dst)
diff --git a/vendor/github.com/creack/pty/.gitignore b/vendor/github.com/creack/pty/.gitignore
deleted file mode 100644
index 1f0a99f..0000000
--- a/vendor/github.com/creack/pty/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-[568].out
-_go*
-_test*
-_obj
diff --git a/vendor/github.com/creack/pty/Dockerfile.golang b/vendor/github.com/creack/pty/Dockerfile.golang
deleted file mode 100644
index 2ee82a3..0000000
--- a/vendor/github.com/creack/pty/Dockerfile.golang
+++ /dev/null
@@ -1,17 +0,0 @@
-ARG GOVERSION=1.14
-FROM golang:${GOVERSION}
-
-# Set base env.
-ARG GOOS=linux
-ARG GOARCH=amd64
-ENV GOOS=${GOOS} GOARCH=${GOARCH} CGO_ENABLED=0 GOFLAGS='-v -ldflags=-s -ldflags=-w'
-
-# Pre compile the stdlib for 386/arm (32bits).
-RUN go build -a std
-
-# Add the code to the image.
-WORKDIR pty
-ADD . .
-
-# Build the lib.
-RUN go build
diff --git a/vendor/github.com/creack/pty/Dockerfile.riscv b/vendor/github.com/creack/pty/Dockerfile.riscv
deleted file mode 100644
index 7a30c94..0000000
--- a/vendor/github.com/creack/pty/Dockerfile.riscv
+++ /dev/null
@@ -1,23 +0,0 @@
-# NOTE: Using 1.13 as a base to build the RISCV compiler, the resulting version is based on go1.6.
-FROM golang:1.13
-
-# Clone and complie a riscv compatible version of the go compiler.
-RUN git clone https://review.gerrithub.io/riscv/riscv-go /riscv-go
-# riscvdev branch HEAD as of 2019-06-29.
-RUN cd /riscv-go && git checkout 04885fddd096d09d4450726064d06dd107e374bf
-ENV PATH=/riscv-go/misc/riscv:/riscv-go/bin:$PATH
-RUN cd /riscv-go/src && GOROOT_BOOTSTRAP=$(go env GOROOT) ./make.bash
-ENV GOROOT=/riscv-go
-
-# Set the base env.
-ENV GOOS=linux GOARCH=riscv CGO_ENABLED=0 GOFLAGS='-v -ldflags=-s -ldflags=-w'
-
-# Pre compile the stdlib.
-RUN go build -a std
-
-# Add the code to the image.
-WORKDIR pty
-ADD . .
-
-# Build the lib.
-RUN go build
diff --git a/vendor/github.com/creack/pty/LICENSE b/vendor/github.com/creack/pty/LICENSE
deleted file mode 100644
index 6b7558b..0000000
--- a/vendor/github.com/creack/pty/LICENSE
+++ /dev/null
@@ -1,23 +0,0 @@
-Copyright (c) 2011 Keith Rarick
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated
-documentation files (the "Software"), to deal in the
-Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute,
-sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-The above copyright notice and this permission notice shall
-be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
-KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
-PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
-OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/creack/pty/README.md b/vendor/github.com/creack/pty/README.md
deleted file mode 100644
index a4fe767..0000000
--- a/vendor/github.com/creack/pty/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# pty
-
-Pty is a Go package for using unix pseudo-terminals.
-
-## Install
-
-```sh
-go get github.com/creack/pty
-```
-
-## Examples
-
-Note that those examples are for demonstration purpose only, to showcase how to use the library. They are not meant to be used in any kind of production environment.
-
-### Command
-
-```go
-package main
-
-import (
- "io"
- "os"
- "os/exec"
-
- "github.com/creack/pty"
-)
-
-func main() {
- c := exec.Command("grep", "--color=auto", "bar")
- f, err := pty.Start(c)
- if err != nil {
- panic(err)
- }
-
- go func() {
- f.Write([]byte("foo\n"))
- f.Write([]byte("bar\n"))
- f.Write([]byte("baz\n"))
- f.Write([]byte{4}) // EOT
- }()
- io.Copy(os.Stdout, f)
-}
-```
-
-### Shell
-
-```go
-package main
-
-import (
- "io"
- "log"
- "os"
- "os/exec"
- "os/signal"
- "syscall"
-
- "github.com/creack/pty"
- "golang.org/x/term"
-)
-
-func test() error {
- // Create arbitrary command.
- c := exec.Command("bash")
-
- // Start the command with a pty.
- ptmx, err := pty.Start(c)
- if err != nil {
- return err
- }
- // Make sure to close the pty at the end.
- defer func() { _ = ptmx.Close() }() // Best effort.
-
- // Handle pty size.
- ch := make(chan os.Signal, 1)
- signal.Notify(ch, syscall.SIGWINCH)
- go func() {
- for range ch {
- if err := pty.InheritSize(os.Stdin, ptmx); err != nil {
- log.Printf("error resizing pty: %s", err)
- }
- }
- }()
- ch <- syscall.SIGWINCH // Initial resize.
- defer func() { signal.Stop(ch); close(ch) }() // Cleanup signals when done.
-
- // Set stdin in raw mode.
- oldState, err := term.MakeRaw(int(os.Stdin.Fd()))
- if err != nil {
- panic(err)
- }
- defer func() { _ = term.Restore(int(os.Stdin.Fd()), oldState) }() // Best effort.
-
- // Copy stdin to the pty and the pty to stdout.
- // NOTE: The goroutine will keep reading until the next keystroke before returning.
- go func() { _, _ = io.Copy(ptmx, os.Stdin) }()
- _, _ = io.Copy(os.Stdout, ptmx)
-
- return nil
-}
-
-func main() {
- if err := test(); err != nil {
- log.Fatal(err)
- }
-}
-```
diff --git a/vendor/github.com/creack/pty/asm_solaris_amd64.s b/vendor/github.com/creack/pty/asm_solaris_amd64.s
deleted file mode 100644
index 7fbef8e..0000000
--- a/vendor/github.com/creack/pty/asm_solaris_amd64.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-//+build gc
-
-#include "textflag.h"
-
-//
-// System calls for amd64, Solaris are implemented in runtime/syscall_solaris.go
-//
-
-TEXT ·sysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·sysvicall6(SB)
-
-TEXT ·rawSysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·rawSysvicall6(SB)
diff --git a/vendor/github.com/creack/pty/doc.go b/vendor/github.com/creack/pty/doc.go
deleted file mode 100644
index 3c8b324..0000000
--- a/vendor/github.com/creack/pty/doc.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Package pty provides functions for working with Unix terminals.
-package pty
-
-import (
- "errors"
- "os"
-)
-
-// ErrUnsupported is returned if a function is not
-// available on the current platform.
-var ErrUnsupported = errors.New("unsupported")
-
-// Open a pty and its corresponding tty.
-func Open() (pty, tty *os.File, err error) {
- return open()
-}
diff --git a/vendor/github.com/creack/pty/ioctl.go b/vendor/github.com/creack/pty/ioctl.go
deleted file mode 100644
index 3cabedd..0000000
--- a/vendor/github.com/creack/pty/ioctl.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build !windows && !solaris && !aix
-// +build !windows,!solaris,!aix
-
-package pty
-
-import "syscall"
-
-const (
- TIOCGWINSZ = syscall.TIOCGWINSZ
- TIOCSWINSZ = syscall.TIOCSWINSZ
-)
-
-func ioctl(fd, cmd, ptr uintptr) error {
- _, _, e := syscall.Syscall(syscall.SYS_IOCTL, fd, cmd, ptr)
- if e != 0 {
- return e
- }
- return nil
-}
diff --git a/vendor/github.com/creack/pty/ioctl_bsd.go b/vendor/github.com/creack/pty/ioctl_bsd.go
deleted file mode 100644
index db3bf84..0000000
--- a/vendor/github.com/creack/pty/ioctl_bsd.go
+++ /dev/null
@@ -1,40 +0,0 @@
-//go:build darwin || dragonfly || freebsd || netbsd || openbsd
-// +build darwin dragonfly freebsd netbsd openbsd
-
-package pty
-
-// from
-const (
- _IOC_VOID uintptr = 0x20000000
- _IOC_OUT uintptr = 0x40000000
- _IOC_IN uintptr = 0x80000000
- _IOC_IN_OUT uintptr = _IOC_OUT | _IOC_IN
- _IOC_DIRMASK = _IOC_VOID | _IOC_OUT | _IOC_IN
-
- _IOC_PARAM_SHIFT = 13
- _IOC_PARAM_MASK = (1 << _IOC_PARAM_SHIFT) - 1
-)
-
-func _IOC_PARM_LEN(ioctl uintptr) uintptr {
- return (ioctl >> 16) & _IOC_PARAM_MASK
-}
-
-func _IOC(inout uintptr, group byte, ioctl_num uintptr, param_len uintptr) uintptr {
- return inout | (param_len&_IOC_PARAM_MASK)<<16 | uintptr(group)<<8 | ioctl_num
-}
-
-func _IO(group byte, ioctl_num uintptr) uintptr {
- return _IOC(_IOC_VOID, group, ioctl_num, 0)
-}
-
-func _IOR(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
- return _IOC(_IOC_OUT, group, ioctl_num, param_len)
-}
-
-func _IOW(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
- return _IOC(_IOC_IN, group, ioctl_num, param_len)
-}
-
-func _IOWR(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
- return _IOC(_IOC_IN_OUT, group, ioctl_num, param_len)
-}
diff --git a/vendor/github.com/creack/pty/ioctl_solaris.go b/vendor/github.com/creack/pty/ioctl_solaris.go
deleted file mode 100644
index bff22da..0000000
--- a/vendor/github.com/creack/pty/ioctl_solaris.go
+++ /dev/null
@@ -1,48 +0,0 @@
-//go:build solaris
-// +build solaris
-
-package pty
-
-import (
- "syscall"
- "unsafe"
-)
-
-//go:cgo_import_dynamic libc_ioctl ioctl "libc.so"
-//go:linkname procioctl libc_ioctl
-var procioctl uintptr
-
-const (
- // see /usr/include/sys/stropts.h
- I_PUSH = uintptr((int32('S')<<8 | 002))
- I_STR = uintptr((int32('S')<<8 | 010))
- I_FIND = uintptr((int32('S')<<8 | 013))
-
- // see /usr/include/sys/ptms.h
- ISPTM = (int32('P') << 8) | 1
- UNLKPT = (int32('P') << 8) | 2
- PTSSTTY = (int32('P') << 8) | 3
- ZONEPT = (int32('P') << 8) | 4
- OWNERPT = (int32('P') << 8) | 5
-
- // see /usr/include/sys/termios.h
- TIOCSWINSZ = (uint32('T') << 8) | 103
- TIOCGWINSZ = (uint32('T') << 8) | 104
-)
-
-type strioctl struct {
- icCmd int32
- icTimeout int32
- icLen int32
- icDP unsafe.Pointer
-}
-
-// Defined in asm_solaris_amd64.s.
-func sysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
-
-func ioctl(fd, cmd, ptr uintptr) error {
- if _, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procioctl)), 3, fd, cmd, ptr, 0, 0, 0); errno != 0 {
- return errno
- }
- return nil
-}
diff --git a/vendor/github.com/creack/pty/ioctl_unsupported.go b/vendor/github.com/creack/pty/ioctl_unsupported.go
deleted file mode 100644
index 2449a27..0000000
--- a/vendor/github.com/creack/pty/ioctl_unsupported.go
+++ /dev/null
@@ -1,13 +0,0 @@
-//go:build aix
-// +build aix
-
-package pty
-
-const (
- TIOCGWINSZ = 0
- TIOCSWINSZ = 0
-)
-
-func ioctl(fd, cmd, ptr uintptr) error {
- return ErrUnsupported
-}
diff --git a/vendor/github.com/creack/pty/mktypes.bash b/vendor/github.com/creack/pty/mktypes.bash
deleted file mode 100644
index 7f71bda..0000000
--- a/vendor/github.com/creack/pty/mktypes.bash
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-GOOSARCH="${GOOS}_${GOARCH}"
-case "$GOOSARCH" in
-_* | *_ | _)
- echo 'undefined $GOOS_$GOARCH:' "$GOOSARCH" 1>&2
- exit 1
- ;;
-esac
-
-GODEFS="go tool cgo -godefs"
-
-$GODEFS types.go |gofmt > ztypes_$GOARCH.go
-
-case $GOOS in
-freebsd|dragonfly|netbsd|openbsd)
- $GODEFS types_$GOOS.go |gofmt > ztypes_$GOOSARCH.go
- ;;
-esac
diff --git a/vendor/github.com/creack/pty/pty_darwin.go b/vendor/github.com/creack/pty/pty_darwin.go
deleted file mode 100644
index 9bdd71d..0000000
--- a/vendor/github.com/creack/pty/pty_darwin.go
+++ /dev/null
@@ -1,68 +0,0 @@
-//go:build darwin
-// +build darwin
-
-package pty
-
-import (
- "errors"
- "os"
- "syscall"
- "unsafe"
-)
-
-func open() (pty, tty *os.File, err error) {
- pFD, err := syscall.Open("/dev/ptmx", syscall.O_RDWR|syscall.O_CLOEXEC, 0)
- if err != nil {
- return nil, nil, err
- }
- p := os.NewFile(uintptr(pFD), "/dev/ptmx")
- // In case of error after this point, make sure we close the ptmx fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- if err := grantpt(p); err != nil {
- return nil, nil, err
- }
-
- if err := unlockpt(p); err != nil {
- return nil, nil, err
- }
-
- t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0)
- if err != nil {
- return nil, nil, err
- }
- return p, t, nil
-}
-
-func ptsname(f *os.File) (string, error) {
- n := make([]byte, _IOC_PARM_LEN(syscall.TIOCPTYGNAME))
-
- err := ioctl(f.Fd(), syscall.TIOCPTYGNAME, uintptr(unsafe.Pointer(&n[0])))
- if err != nil {
- return "", err
- }
-
- for i, c := range n {
- if c == 0 {
- return string(n[:i]), nil
- }
- }
- return "", errors.New("TIOCPTYGNAME string not NUL-terminated")
-}
-
-func grantpt(f *os.File) error {
- return ioctl(f.Fd(), syscall.TIOCPTYGRANT, 0)
-}
-
-func unlockpt(f *os.File) error {
- return ioctl(f.Fd(), syscall.TIOCPTYUNLK, 0)
-}
diff --git a/vendor/github.com/creack/pty/pty_dragonfly.go b/vendor/github.com/creack/pty/pty_dragonfly.go
deleted file mode 100644
index aa916aa..0000000
--- a/vendor/github.com/creack/pty/pty_dragonfly.go
+++ /dev/null
@@ -1,83 +0,0 @@
-//go:build dragonfly
-// +build dragonfly
-
-package pty
-
-import (
- "errors"
- "os"
- "strings"
- "syscall"
- "unsafe"
-)
-
-// same code as pty_darwin.go
-func open() (pty, tty *os.File, err error) {
- p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
- if err != nil {
- return nil, nil, err
- }
- // In case of error after this point, make sure we close the ptmx fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- if err := grantpt(p); err != nil {
- return nil, nil, err
- }
-
- if err := unlockpt(p); err != nil {
- return nil, nil, err
- }
-
- t, err := os.OpenFile(sname, os.O_RDWR, 0)
- if err != nil {
- return nil, nil, err
- }
- return p, t, nil
-}
-
-func grantpt(f *os.File) error {
- _, err := isptmaster(f.Fd())
- return err
-}
-
-func unlockpt(f *os.File) error {
- _, err := isptmaster(f.Fd())
- return err
-}
-
-func isptmaster(fd uintptr) (bool, error) {
- err := ioctl(fd, syscall.TIOCISPTMASTER, 0)
- return err == nil, err
-}
-
-var (
- emptyFiodgnameArg fiodgnameArg
- ioctl_FIODNAME = _IOW('f', 120, unsafe.Sizeof(emptyFiodgnameArg))
-)
-
-func ptsname(f *os.File) (string, error) {
- name := make([]byte, _C_SPECNAMELEN)
- fa := fiodgnameArg{Name: (*byte)(unsafe.Pointer(&name[0])), Len: _C_SPECNAMELEN, Pad_cgo_0: [4]byte{0, 0, 0, 0}}
-
- err := ioctl(f.Fd(), ioctl_FIODNAME, uintptr(unsafe.Pointer(&fa)))
- if err != nil {
- return "", err
- }
-
- for i, c := range name {
- if c == 0 {
- s := "/dev/" + string(name[:i])
- return strings.Replace(s, "ptm", "pts", -1), nil
- }
- }
- return "", errors.New("TIOCPTYGNAME string not NUL-terminated")
-}
diff --git a/vendor/github.com/creack/pty/pty_freebsd.go b/vendor/github.com/creack/pty/pty_freebsd.go
deleted file mode 100644
index bcd3b6f..0000000
--- a/vendor/github.com/creack/pty/pty_freebsd.go
+++ /dev/null
@@ -1,81 +0,0 @@
-//go:build freebsd
-// +build freebsd
-
-package pty
-
-import (
- "errors"
- "os"
- "syscall"
- "unsafe"
-)
-
-func posixOpenpt(oflag int) (fd int, err error) {
- r0, _, e1 := syscall.Syscall(syscall.SYS_POSIX_OPENPT, uintptr(oflag), 0, 0)
- fd = int(r0)
- if e1 != 0 {
- err = e1
- }
- return fd, err
-}
-
-func open() (pty, tty *os.File, err error) {
- fd, err := posixOpenpt(syscall.O_RDWR | syscall.O_CLOEXEC)
- if err != nil {
- return nil, nil, err
- }
- p := os.NewFile(uintptr(fd), "/dev/pts")
- // In case of error after this point, make sure we close the pts fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- t, err := os.OpenFile("/dev/"+sname, os.O_RDWR, 0)
- if err != nil {
- return nil, nil, err
- }
- return p, t, nil
-}
-
-func isptmaster(fd uintptr) (bool, error) {
- err := ioctl(fd, syscall.TIOCPTMASTER, 0)
- return err == nil, err
-}
-
-var (
- emptyFiodgnameArg fiodgnameArg
- ioctlFIODGNAME = _IOW('f', 120, unsafe.Sizeof(emptyFiodgnameArg))
-)
-
-func ptsname(f *os.File) (string, error) {
- master, err := isptmaster(f.Fd())
- if err != nil {
- return "", err
- }
- if !master {
- return "", syscall.EINVAL
- }
-
- const n = _C_SPECNAMELEN + 1
- var (
- buf = make([]byte, n)
- arg = fiodgnameArg{Len: n, Buf: (*byte)(unsafe.Pointer(&buf[0]))}
- )
- if err := ioctl(f.Fd(), ioctlFIODGNAME, uintptr(unsafe.Pointer(&arg))); err != nil {
- return "", err
- }
-
- for i, c := range buf {
- if c == 0 {
- return string(buf[:i]), nil
- }
- }
- return "", errors.New("FIODGNAME string not NUL-terminated")
-}
diff --git a/vendor/github.com/creack/pty/pty_linux.go b/vendor/github.com/creack/pty/pty_linux.go
deleted file mode 100644
index a3b368f..0000000
--- a/vendor/github.com/creack/pty/pty_linux.go
+++ /dev/null
@@ -1,54 +0,0 @@
-//go:build linux
-// +build linux
-
-package pty
-
-import (
- "os"
- "strconv"
- "syscall"
- "unsafe"
-)
-
-func open() (pty, tty *os.File, err error) {
- p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
- if err != nil {
- return nil, nil, err
- }
- // In case of error after this point, make sure we close the ptmx fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- if err := unlockpt(p); err != nil {
- return nil, nil, err
- }
-
- t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0) //nolint:gosec // Expected Open from a variable.
- if err != nil {
- return nil, nil, err
- }
- return p, t, nil
-}
-
-func ptsname(f *os.File) (string, error) {
- var n _C_uint
- err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))) //nolint:gosec // Expected unsafe pointer for Syscall call.
- if err != nil {
- return "", err
- }
- return "/dev/pts/" + strconv.Itoa(int(n)), nil
-}
-
-func unlockpt(f *os.File) error {
- var u _C_int
- // use TIOCSPTLCK with a pointer to zero to clear the lock
- return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) //nolint:gosec // Expected unsafe pointer for Syscall call.
-}
diff --git a/vendor/github.com/creack/pty/pty_netbsd.go b/vendor/github.com/creack/pty/pty_netbsd.go
deleted file mode 100644
index 2b20d94..0000000
--- a/vendor/github.com/creack/pty/pty_netbsd.go
+++ /dev/null
@@ -1,69 +0,0 @@
-//go:build netbsd
-// +build netbsd
-
-package pty
-
-import (
- "errors"
- "os"
- "syscall"
- "unsafe"
-)
-
-func open() (pty, tty *os.File, err error) {
- p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
- if err != nil {
- return nil, nil, err
- }
- // In case of error after this point, make sure we close the ptmx fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- if err := grantpt(p); err != nil {
- return nil, nil, err
- }
-
- // In NetBSD unlockpt() does nothing, so it isn't called here.
-
- t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0)
- if err != nil {
- return nil, nil, err
- }
- return p, t, nil
-}
-
-func ptsname(f *os.File) (string, error) {
- /*
- * from ptsname(3): The ptsname() function is equivalent to:
- * struct ptmget pm;
- * ioctl(fd, TIOCPTSNAME, &pm) == -1 ? NULL : pm.sn;
- */
- var ptm ptmget
- if err := ioctl(f.Fd(), uintptr(ioctl_TIOCPTSNAME), uintptr(unsafe.Pointer(&ptm))); err != nil {
- return "", err
- }
- name := make([]byte, len(ptm.Sn))
- for i, c := range ptm.Sn {
- name[i] = byte(c)
- if c == 0 {
- return string(name[:i]), nil
- }
- }
- return "", errors.New("TIOCPTSNAME string not NUL-terminated")
-}
-
-func grantpt(f *os.File) error {
- /*
- * from grantpt(3): Calling grantpt() is equivalent to:
- * ioctl(fd, TIOCGRANTPT, 0);
- */
- return ioctl(f.Fd(), uintptr(ioctl_TIOCGRANTPT), 0)
-}
diff --git a/vendor/github.com/creack/pty/pty_openbsd.go b/vendor/github.com/creack/pty/pty_openbsd.go
deleted file mode 100644
index 031367a..0000000
--- a/vendor/github.com/creack/pty/pty_openbsd.go
+++ /dev/null
@@ -1,36 +0,0 @@
-//go:build openbsd
-// +build openbsd
-
-package pty
-
-import (
- "os"
- "syscall"
- "unsafe"
-)
-
-func open() (pty, tty *os.File, err error) {
- /*
- * from ptm(4):
- * The PTMGET command allocates a free pseudo terminal, changes its
- * ownership to the caller, revokes the access privileges for all previous
- * users, opens the file descriptors for the pty and tty devices and
- * returns them to the caller in struct ptmget.
- */
-
- p, err := os.OpenFile("/dev/ptm", os.O_RDWR|syscall.O_CLOEXEC, 0)
- if err != nil {
- return nil, nil, err
- }
- defer p.Close()
-
- var ptm ptmget
- if err := ioctl(p.Fd(), uintptr(ioctl_PTMGET), uintptr(unsafe.Pointer(&ptm))); err != nil {
- return nil, nil, err
- }
-
- pty = os.NewFile(uintptr(ptm.Cfd), "/dev/ptm")
- tty = os.NewFile(uintptr(ptm.Sfd), "/dev/ptm")
-
- return pty, tty, nil
-}
diff --git a/vendor/github.com/creack/pty/pty_solaris.go b/vendor/github.com/creack/pty/pty_solaris.go
deleted file mode 100644
index 37f933e..0000000
--- a/vendor/github.com/creack/pty/pty_solaris.go
+++ /dev/null
@@ -1,152 +0,0 @@
-//go:build solaris
-// +build solaris
-
-package pty
-
-/* based on:
-http://src.illumos.org/source/xref/illumos-gate/usr/src/lib/libc/port/gen/pt.c
-*/
-
-import (
- "errors"
- "os"
- "strconv"
- "syscall"
- "unsafe"
-)
-
-func open() (pty, tty *os.File, err error) {
- ptmxfd, err := syscall.Open("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY, 0)
- if err != nil {
- return nil, nil, err
- }
- p := os.NewFile(uintptr(ptmxfd), "/dev/ptmx")
- // In case of error after this point, make sure we close the ptmx fd.
- defer func() {
- if err != nil {
- _ = p.Close() // Best effort.
- }
- }()
-
- sname, err := ptsname(p)
- if err != nil {
- return nil, nil, err
- }
-
- if err := grantpt(p); err != nil {
- return nil, nil, err
- }
-
- if err := unlockpt(p); err != nil {
- return nil, nil, err
- }
-
- ptsfd, err := syscall.Open(sname, os.O_RDWR|syscall.O_NOCTTY, 0)
- if err != nil {
- return nil, nil, err
- }
- t := os.NewFile(uintptr(ptsfd), sname)
-
- // In case of error after this point, make sure we close the pts fd.
- defer func() {
- if err != nil {
- _ = t.Close() // Best effort.
- }
- }()
-
- // pushing terminal driver STREAMS modules as per pts(7)
- for _, mod := range []string{"ptem", "ldterm", "ttcompat"} {
- if err := streamsPush(t, mod); err != nil {
- return nil, nil, err
- }
- }
-
- return p, t, nil
-}
-
-func ptsname(f *os.File) (string, error) {
- dev, err := ptsdev(f.Fd())
- if err != nil {
- return "", err
- }
- fn := "/dev/pts/" + strconv.FormatInt(int64(dev), 10)
-
- if err := syscall.Access(fn, 0); err != nil {
- return "", err
- }
- return fn, nil
-}
-
-func unlockpt(f *os.File) error {
- istr := strioctl{
- icCmd: UNLKPT,
- icTimeout: 0,
- icLen: 0,
- icDP: nil,
- }
- return ioctl(f.Fd(), I_STR, uintptr(unsafe.Pointer(&istr)))
-}
-
-func minor(x uint64) uint64 { return x & 0377 }
-
-func ptsdev(fd uintptr) (uint64, error) {
- istr := strioctl{
- icCmd: ISPTM,
- icTimeout: 0,
- icLen: 0,
- icDP: nil,
- }
-
- if err := ioctl(fd, I_STR, uintptr(unsafe.Pointer(&istr))); err != nil {
- return 0, err
- }
- var status syscall.Stat_t
- if err := syscall.Fstat(int(fd), &status); err != nil {
- return 0, err
- }
- return uint64(minor(status.Rdev)), nil
-}
-
-type ptOwn struct {
- rUID int32
- rGID int32
-}
-
-func grantpt(f *os.File) error {
- if _, err := ptsdev(f.Fd()); err != nil {
- return err
- }
- pto := ptOwn{
- rUID: int32(os.Getuid()),
- // XXX should first attempt to get gid of DEFAULT_TTY_GROUP="tty"
- rGID: int32(os.Getgid()),
- }
- istr := strioctl{
- icCmd: OWNERPT,
- icTimeout: 0,
- icLen: int32(unsafe.Sizeof(strioctl{})),
- icDP: unsafe.Pointer(&pto),
- }
- if err := ioctl(f.Fd(), I_STR, uintptr(unsafe.Pointer(&istr))); err != nil {
- return errors.New("access denied")
- }
- return nil
-}
-
-// streamsPush pushes STREAMS modules if not already done so.
-func streamsPush(f *os.File, mod string) error {
- buf := []byte(mod)
-
- // XXX I_FIND is not returning an error when the module
- // is already pushed even though truss reports a return
- // value of 1. A bug in the Go Solaris syscall interface?
- // XXX without this we are at risk of the issue
- // https://www.illumos.org/issues/9042
- // but since we are not using libc or XPG4.2, we should not be
- // double-pushing modules
-
- if err := ioctl(f.Fd(), I_FIND, uintptr(unsafe.Pointer(&buf[0]))); err != nil {
- return nil
- }
- return ioctl(f.Fd(), I_PUSH, uintptr(unsafe.Pointer(&buf[0])))
-}
diff --git a/vendor/github.com/creack/pty/pty_unsupported.go b/vendor/github.com/creack/pty/pty_unsupported.go
deleted file mode 100644
index c771020..0000000
--- a/vendor/github.com/creack/pty/pty_unsupported.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build !linux && !darwin && !freebsd && !dragonfly && !netbsd && !openbsd && !solaris
-// +build !linux,!darwin,!freebsd,!dragonfly,!netbsd,!openbsd,!solaris
-
-package pty
-
-import (
- "os"
-)
-
-func open() (pty, tty *os.File, err error) {
- return nil, nil, ErrUnsupported
-}
diff --git a/vendor/github.com/creack/pty/run.go b/vendor/github.com/creack/pty/run.go
deleted file mode 100644
index 4755366..0000000
--- a/vendor/github.com/creack/pty/run.go
+++ /dev/null
@@ -1,57 +0,0 @@
-package pty
-
-import (
- "os"
- "os/exec"
- "syscall"
-)
-
-// Start assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
-// and c.Stderr, calls c.Start, and returns the File of the tty's
-// corresponding pty.
-//
-// Starts the process in a new session and sets the controlling terminal.
-func Start(cmd *exec.Cmd) (*os.File, error) {
- return StartWithSize(cmd, nil)
-}
-
-// StartWithAttrs assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
-// and c.Stderr, calls c.Start, and returns the File of the tty's
-// corresponding pty.
-//
-// This will resize the pty to the specified size before starting the command if a size is provided.
-// The `attrs` parameter overrides the one set in c.SysProcAttr.
-//
-// This should generally not be needed. Used in some edge cases where it is needed to create a pty
-// without a controlling terminal.
-func StartWithAttrs(c *exec.Cmd, sz *Winsize, attrs *syscall.SysProcAttr) (*os.File, error) {
- pty, tty, err := Open()
- if err != nil {
- return nil, err
- }
- defer func() { _ = tty.Close() }() // Best effort.
-
- if sz != nil {
- if err := Setsize(pty, sz); err != nil {
- _ = pty.Close() // Best effort.
- return nil, err
- }
- }
- if c.Stdout == nil {
- c.Stdout = tty
- }
- if c.Stderr == nil {
- c.Stderr = tty
- }
- if c.Stdin == nil {
- c.Stdin = tty
- }
-
- c.SysProcAttr = attrs
-
- if err := c.Start(); err != nil {
- _ = pty.Close() // Best effort.
- return nil, err
- }
- return pty, err
-}
diff --git a/vendor/github.com/creack/pty/start.go b/vendor/github.com/creack/pty/start.go
deleted file mode 100644
index 9b51635..0000000
--- a/vendor/github.com/creack/pty/start.go
+++ /dev/null
@@ -1,25 +0,0 @@
-//go:build !windows
-// +build !windows
-
-package pty
-
-import (
- "os"
- "os/exec"
- "syscall"
-)
-
-// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
-// and c.Stderr, calls c.Start, and returns the File of the tty's
-// corresponding pty.
-//
-// This will resize the pty to the specified size before starting the command.
-// Starts the process in a new session and sets the controlling terminal.
-func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
- if cmd.SysProcAttr == nil {
- cmd.SysProcAttr = &syscall.SysProcAttr{}
- }
- cmd.SysProcAttr.Setsid = true
- cmd.SysProcAttr.Setctty = true
- return StartWithAttrs(cmd, ws, cmd.SysProcAttr)
-}
diff --git a/vendor/github.com/creack/pty/start_windows.go b/vendor/github.com/creack/pty/start_windows.go
deleted file mode 100644
index 7e9530b..0000000
--- a/vendor/github.com/creack/pty/start_windows.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build windows
-// +build windows
-
-package pty
-
-import (
- "os"
- "os/exec"
-)
-
-// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
-// and c.Stderr, calls c.Start, and returns the File of the tty's
-// corresponding pty.
-//
-// This will resize the pty to the specified size before starting the command.
-// Starts the process in a new session and sets the controlling terminal.
-func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
- return nil, ErrUnsupported
-}
diff --git a/vendor/github.com/creack/pty/test_crosscompile.sh b/vendor/github.com/creack/pty/test_crosscompile.sh
deleted file mode 100644
index 47e8b10..0000000
--- a/vendor/github.com/creack/pty/test_crosscompile.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env sh
-
-# Test script checking that all expected os/arch compile properly.
-# Does not actually test the logic, just the compilation so we make sure we don't break code depending on the lib.
-
-echo2() {
- echo $@ >&2
-}
-
-trap end 0
-end() {
- [ "$?" = 0 ] && echo2 "Pass." || (echo2 "Fail."; exit 1)
-}
-
-cross() {
- os=$1
- shift
- echo2 "Build for $os."
- for arch in $@; do
- echo2 " - $os/$arch"
- GOOS=$os GOARCH=$arch go build
- done
- echo2
-}
-
-set -e
-
-cross linux amd64 386 arm arm64 ppc64 ppc64le s390x mips mipsle mips64 mips64le
-cross darwin amd64 arm64
-cross freebsd amd64 386 arm arm64
-cross netbsd amd64 386 arm arm64
-cross openbsd amd64 386 arm arm64
-cross dragonfly amd64
-cross solaris amd64
-
-# Not expected to work but should still compile.
-cross windows amd64 386 arm
-
-# TODO: Fix compilation error on openbsd/arm.
-# TODO: Merge the solaris PR.
-
-# Some os/arch require a different compiler. Run in docker.
-if ! hash docker; then
- # If docker is not present, stop here.
- return
-fi
-
-echo2 "Build for linux."
-echo2 " - linux/riscv"
-docker build -t creack-pty-test -f Dockerfile.riscv .
-
-# Golang dropped support for darwin 32bits since go1.15. Make sure the lib still compile with go1.14 on those archs.
-echo2 "Build for darwin (32bits)."
-echo2 " - darwin/386"
-docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.14 --build-arg=GOOS=darwin --build-arg=GOARCH=386 .
-echo2 " - darwin/arm"
-docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.14 --build-arg=GOOS=darwin --build-arg=GOARCH=arm .
-
-# Run a single test for an old go version. Would be best with go1.0, but not available on Dockerhub.
-# Using 1.6 as it is the base version for the RISCV compiler.
-# Would also be better to run all the tests, not just one, need to refactor this file to allow for specifc archs per version.
-echo2 "Build for linux - go1.6."
-echo2 " - linux/amd64"
-docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.6 --build-arg=GOOS=linux --build-arg=GOARCH=amd64 .
diff --git a/vendor/github.com/creack/pty/winsize.go b/vendor/github.com/creack/pty/winsize.go
deleted file mode 100644
index 57323f4..0000000
--- a/vendor/github.com/creack/pty/winsize.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package pty
-
-import "os"
-
-// InheritSize applies the terminal size of pty to tty. This should be run
-// in a signal handler for syscall.SIGWINCH to automatically resize the tty when
-// the pty receives a window size change notification.
-func InheritSize(pty, tty *os.File) error {
- size, err := GetsizeFull(pty)
- if err != nil {
- return err
- }
- if err := Setsize(tty, size); err != nil {
- return err
- }
- return nil
-}
-
-// Getsize returns the number of rows (lines) and cols (positions
-// in each line) in terminal t.
-func Getsize(t *os.File) (rows, cols int, err error) {
- ws, err := GetsizeFull(t)
- if err != nil {
- return 0, 0, err
- }
- return int(ws.Rows), int(ws.Cols), nil
-}
diff --git a/vendor/github.com/creack/pty/winsize_unix.go b/vendor/github.com/creack/pty/winsize_unix.go
deleted file mode 100644
index 5d99c3d..0000000
--- a/vendor/github.com/creack/pty/winsize_unix.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//go:build !windows
-// +build !windows
-
-package pty
-
-import (
- "os"
- "syscall"
- "unsafe"
-)
-
-// Winsize describes the terminal size.
-type Winsize struct {
- Rows uint16 // ws_row: Number of rows (in cells)
- Cols uint16 // ws_col: Number of columns (in cells)
- X uint16 // ws_xpixel: Width in pixels
- Y uint16 // ws_ypixel: Height in pixels
-}
-
-// Setsize resizes t to s.
-func Setsize(t *os.File, ws *Winsize) error {
- //nolint:gosec // Expected unsafe pointer for Syscall call.
- return ioctl(t.Fd(), syscall.TIOCSWINSZ, uintptr(unsafe.Pointer(ws)))
-}
-
-// GetsizeFull returns the full terminal size description.
-func GetsizeFull(t *os.File) (size *Winsize, err error) {
- var ws Winsize
-
- //nolint:gosec // Expected unsafe pointer for Syscall call.
- if err := ioctl(t.Fd(), syscall.TIOCGWINSZ, uintptr(unsafe.Pointer(&ws))); err != nil {
- return nil, err
- }
- return &ws, nil
-}
diff --git a/vendor/github.com/creack/pty/winsize_unsupported.go b/vendor/github.com/creack/pty/winsize_unsupported.go
deleted file mode 100644
index 0d21099..0000000
--- a/vendor/github.com/creack/pty/winsize_unsupported.go
+++ /dev/null
@@ -1,23 +0,0 @@
-//go:build windows
-// +build windows
-
-package pty
-
-import (
- "os"
-)
-
-// Winsize is a dummy struct to enable compilation on unsupported platforms.
-type Winsize struct {
- Rows, Cols, X, Y uint16
-}
-
-// Setsize resizes t to s.
-func Setsize(*os.File, *Winsize) error {
- return ErrUnsupported
-}
-
-// GetsizeFull returns the full terminal size description.
-func GetsizeFull(*os.File) (*Winsize, error) {
- return nil, ErrUnsupported
-}
diff --git a/vendor/github.com/creack/pty/ztypes_386.go b/vendor/github.com/creack/pty/ztypes_386.go
deleted file mode 100644
index d126f4a..0000000
--- a/vendor/github.com/creack/pty/ztypes_386.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build 386
-// +build 386
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_amd64.go b/vendor/github.com/creack/pty/ztypes_amd64.go
deleted file mode 100644
index 6c4a767..0000000
--- a/vendor/github.com/creack/pty/ztypes_amd64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build amd64
-// +build amd64
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_arm.go b/vendor/github.com/creack/pty/ztypes_arm.go
deleted file mode 100644
index de6fe16..0000000
--- a/vendor/github.com/creack/pty/ztypes_arm.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build arm
-// +build arm
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_arm64.go b/vendor/github.com/creack/pty/ztypes_arm64.go
deleted file mode 100644
index c4f315c..0000000
--- a/vendor/github.com/creack/pty/ztypes_arm64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build arm64
-// +build arm64
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go b/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go
deleted file mode 100644
index 183c421..0000000
--- a/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go
+++ /dev/null
@@ -1,17 +0,0 @@
-//go:build amd64 && dragonfly
-// +build amd64,dragonfly
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types_dragonfly.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0x3f
-)
-
-type fiodgnameArg struct {
- Name *byte
- Len uint32
- Pad_cgo_0 [4]byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_386.go b/vendor/github.com/creack/pty/ztypes_freebsd_386.go
deleted file mode 100644
index d80dbf7..0000000
--- a/vendor/github.com/creack/pty/ztypes_freebsd_386.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build 386 && freebsd
-// +build 386,freebsd
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types_freebsd.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0x3f
-)
-
-type fiodgnameArg struct {
- Len int32
- Buf *byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go b/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go
deleted file mode 100644
index bfab4e4..0000000
--- a/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go
+++ /dev/null
@@ -1,17 +0,0 @@
-//go:build amd64 && freebsd
-// +build amd64,freebsd
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types_freebsd.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0x3f
-)
-
-type fiodgnameArg struct {
- Len int32
- Pad_cgo_0 [4]byte
- Buf *byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_arm.go b/vendor/github.com/creack/pty/ztypes_freebsd_arm.go
deleted file mode 100644
index 3a8aeae..0000000
--- a/vendor/github.com/creack/pty/ztypes_freebsd_arm.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build arm && freebsd
-// +build arm,freebsd
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types_freebsd.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0x3f
-)
-
-type fiodgnameArg struct {
- Len int32
- Buf *byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go b/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go
deleted file mode 100644
index a839249..0000000
--- a/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build arm64 && freebsd
-// +build arm64,freebsd
-
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs types_freebsd.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0xff
-)
-
-type fiodgnameArg struct {
- Len int32
- Buf *byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go b/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go
deleted file mode 100644
index 5fa102f..0000000
--- a/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types_freebsd.go
-
-package pty
-
-const (
- _C_SPECNAMELEN = 0x3f
-)
-
-type fiodgnameArg struct {
- Len int32
- Pad_cgo_0 [4]byte
- Buf *byte
-}
diff --git a/vendor/github.com/creack/pty/ztypes_loong64.go b/vendor/github.com/creack/pty/ztypes_loong64.go
deleted file mode 100644
index 3beb5c1..0000000
--- a/vendor/github.com/creack/pty/ztypes_loong64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build loong64
-// +build loong64
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_mipsx.go b/vendor/github.com/creack/pty/ztypes_mipsx.go
deleted file mode 100644
index 2812779..0000000
--- a/vendor/github.com/creack/pty/ztypes_mipsx.go
+++ /dev/null
@@ -1,13 +0,0 @@
-//go:build (mips || mipsle || mips64 || mips64le) && linux
-// +build mips mipsle mips64 mips64le
-// +build linux
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go b/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go
deleted file mode 100644
index 2ab7c45..0000000
--- a/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go
+++ /dev/null
@@ -1,17 +0,0 @@
-//go:build (386 || amd64 || arm || arm64) && netbsd
-// +build 386 amd64 arm arm64
-// +build netbsd
-
-package pty
-
-type ptmget struct {
- Cfd int32
- Sfd int32
- Cn [1024]int8
- Sn [1024]int8
-}
-
-var (
- ioctl_TIOCPTSNAME = 0x48087448
- ioctl_TIOCGRANTPT = 0x20007447
-)
diff --git a/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go b/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go
deleted file mode 100644
index 1eb0948..0000000
--- a/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go
+++ /dev/null
@@ -1,14 +0,0 @@
-//go:build (386 || amd64 || arm || arm64 || mips64) && openbsd
-// +build 386 amd64 arm arm64 mips64
-// +build openbsd
-
-package pty
-
-type ptmget struct {
- Cfd int32
- Sfd int32
- Cn [16]int8
- Sn [16]int8
-}
-
-var ioctl_PTMGET = 0x40287401
diff --git a/vendor/github.com/creack/pty/ztypes_ppc64.go b/vendor/github.com/creack/pty/ztypes_ppc64.go
deleted file mode 100644
index bbb3da8..0000000
--- a/vendor/github.com/creack/pty/ztypes_ppc64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build ppc64
-// +build ppc64
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_ppc64le.go b/vendor/github.com/creack/pty/ztypes_ppc64le.go
deleted file mode 100644
index 8a4fac3..0000000
--- a/vendor/github.com/creack/pty/ztypes_ppc64le.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build ppc64le
-// +build ppc64le
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_riscvx.go b/vendor/github.com/creack/pty/ztypes_riscvx.go
deleted file mode 100644
index dc5da90..0000000
--- a/vendor/github.com/creack/pty/ztypes_riscvx.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build riscv || riscv64
-// +build riscv riscv64
-
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/creack/pty/ztypes_s390x.go b/vendor/github.com/creack/pty/ztypes_s390x.go
deleted file mode 100644
index 3433be7..0000000
--- a/vendor/github.com/creack/pty/ztypes_s390x.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build s390x
-// +build s390x
-
-// Created by cgo -godefs - DO NOT EDIT
-// cgo -godefs types.go
-
-package pty
-
-type (
- _C_int int32
- _C_uint uint32
-)
diff --git a/vendor/github.com/jameskeane/bcrypt/.gitignore b/vendor/github.com/jameskeane/bcrypt/.gitignore
deleted file mode 100644
index c1e1c06..0000000
--- a/vendor/github.com/jameskeane/bcrypt/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-6.out
-*.6
-_obj
-_test
-_testmain.go
diff --git a/vendor/github.com/jameskeane/bcrypt/LICENSE b/vendor/github.com/jameskeane/bcrypt/LICENSE
deleted file mode 100644
index 555bb71..0000000
--- a/vendor/github.com/jameskeane/bcrypt/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-Copyright (c) 2011 James Keane . All rights reserved.
-Copyright (c) 2006 Damien Miller .
-Copyright (c) 2011 ZooWar.com, All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of weekendlogic nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/jameskeane/bcrypt/README b/vendor/github.com/jameskeane/bcrypt/README
deleted file mode 100644
index a4d638a..0000000
--- a/vendor/github.com/jameskeane/bcrypt/README
+++ /dev/null
@@ -1,46 +0,0 @@
-Installation:
- goinstall github.com/jameskeane/bcrypt
-
-Example use:
- package main
-
- import (
- "fmt"
- "github.com/jameskeane/bcrypt"
- )
-
- var password = "WyWihatdyd?frub1"
- var bad_password = "just a wild guess"
-
- func main() {
- // generate a random salt with default rounds of complexity
- salt, _ := bcrypt.Salt()
-
- // generate a random salt with 10 rounds of complexity
- salt, _ = bcrypt.Salt(10)
-
- // hash and verify a password with random salt
- hash, _ := bcrypt.Hash(password)
- if bcrypt.Match(password, hash) {
- fmt.Println("They match")
- }
-
- // hash and verify a password with a static salt
- hash, _ = bcrypt.Hash(password, salt)
- if bcrypt.Match(password, hash) {
- fmt.Println("They match")
- }
-
- // verify a random password fails to match the hashed password
- if !bcrypt.Match(bad_password, hash) {
- fmt.Println("They don't match")
- }
- }
-
-Todo:
- grep 'TODO' * -r
-
-Notes:
- * This library is derived from jBcrypt by Damien Miller
- * bcrypt_test.go is from ZooWar.com
-
diff --git a/vendor/github.com/jameskeane/bcrypt/bcrypt.go b/vendor/github.com/jameskeane/bcrypt/bcrypt.go
deleted file mode 100644
index fe84921..0000000
--- a/vendor/github.com/jameskeane/bcrypt/bcrypt.go
+++ /dev/null
@@ -1,190 +0,0 @@
-package bcrypt
-
-import (
- "bytes"
- "crypto/rand"
- "crypto/subtle"
- "encoding/base64"
- "errors"
- "strconv"
- "strings"
-)
-
-var (
- InvalidRounds = errors.New("bcrypt: Invalid rounds parameter")
- InvalidSalt = errors.New("bcrypt: Invalid salt supplied")
-)
-
-const (
- MaxRounds = 31
- MinRounds = 4
- DefaultRounds = 12
- SaltLen = 16
- BlowfishRounds = 16
-)
-
-var enc = base64.NewEncoding("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
-
-// Helper function to build the bcrypt hash string
-// payload takes :
-// * []byte -> which it base64 encodes it (trims padding "=") and writes it to the buffer
-// * string -> which it writes straight to the buffer
-func build_bcrypt_str(minor byte, rounds uint, payload ...interface{}) []byte {
- rs := bytes.NewBuffer(make([]byte, 0, 61))
- rs.WriteString("$2")
- if minor >= 'a' {
- rs.WriteByte(minor)
- }
-
- rs.WriteByte('$')
- if rounds < 10 {
- rs.WriteByte('0')
- }
-
- rs.WriteString(strconv.FormatUint(uint64(rounds), 10))
- rs.WriteByte('$')
- for _, p := range payload {
- if pb, ok := p.([]byte); ok {
- rs.WriteString(strings.TrimRight(enc.EncodeToString(pb), "="))
- } else if ps, ok := p.(string); ok {
- rs.WriteString(ps)
- }
- }
- return rs.Bytes()
-}
-
-// Salt generation
-func Salt(rounds ...int) (string, error) {
- rb, err := SaltBytes(rounds...)
- return string(rb), err
-}
-
-func SaltBytes(rounds ...int) (salt []byte, err error) {
- r := DefaultRounds
- if len(rounds) > 0 {
- r = rounds[0]
- if r < MinRounds || r > MaxRounds {
- return nil, InvalidRounds
- }
- }
-
- rnd := make([]byte, SaltLen)
- read, err := rand.Read(rnd)
- if read != SaltLen || err != nil {
- return nil, err
- }
-
- return build_bcrypt_str('a', uint(r), rnd), nil
-}
-
-func consume(r *bytes.Buffer, b byte) bool {
- got, err := r.ReadByte()
- if err != nil {
- return false
- }
- if got != b {
- r.UnreadByte()
- return false
- }
-
- return true
-}
-
-func Hash(password string, salt ...string) (ps string, err error) {
- var s []byte
- var pb []byte
-
- if len(salt) == 0 {
- s, err = SaltBytes()
- if err != nil {
- return
- }
- } else if len(salt) > 0 {
- s = []byte(salt[0])
- }
-
- pb, err = HashBytes([]byte(password), s)
- return string(pb), err
-}
-
-func HashBytes(password []byte, salt ...[]byte) (hash []byte, err error) {
- var s []byte
-
- if len(salt) == 0 {
- s, err = SaltBytes()
- if err != nil {
- return
- }
- } else if len(salt) > 0 {
- s = salt[0]
- }
-
- // TODO: use a regex? I hear go has bad regex performance a simple FSM seems faster
- // "^\\$2([a-z]?)\\$([0-3][0-9])\\$([\\./A-Za-z0-9]{22}+)"
-
- // Ok, extract the required information
- minor := byte(0)
- sr := bytes.NewBuffer(s)
-
- if !consume(sr, '$') || !consume(sr, '2') {
- return nil, InvalidSalt
- }
-
- if !consume(sr, '$') {
- minor, _ = sr.ReadByte()
- if minor != 'a' || !consume(sr, '$') {
- return nil, InvalidSalt
- }
- }
-
- rounds_bytes := make([]byte, 2)
- read, err := sr.Read(rounds_bytes)
- if err != nil || read != 2 {
- return nil, InvalidSalt
- }
-
- if !consume(sr, '$') {
- return nil, InvalidSalt
- }
-
- var rounds64 uint64
- rounds64, err = strconv.ParseUint(string(rounds_bytes), 10, 0)
- if err != nil {
- return nil, InvalidSalt
- }
-
- rounds := uint(rounds64)
-
- // TODO: can't we use base64.NewDecoder(enc, sr) ?
- salt_bytes := make([]byte, 22)
- read, err = sr.Read(salt_bytes)
- if err != nil || read != 22 {
- return nil, InvalidSalt
- }
-
- var saltb []byte
- // encoding/base64 expects 4 byte blocks padded, since bcrypt uses only 22 bytes we need to go up
- saltb, err = enc.DecodeString(string(salt_bytes) + "==")
- if err != nil {
- return nil, err
- }
-
- // cipher expects null terminated input (go initializes everything with zero values so this works)
- password_term := make([]byte, len(password)+1)
- copy(password_term, password)
-
- hashed := crypt_raw(password_term, saltb[:SaltLen], rounds)
- return build_bcrypt_str(minor, rounds, string(salt_bytes), hashed[:len(bf_crypt_ciphertext)*4-1]), nil
-}
-
-func Match(password, hash string) bool {
- return MatchBytes([]byte(password), []byte(hash))
-}
-
-func MatchBytes(password []byte, hash []byte) bool {
- h, err := HashBytes(password, hash)
- if err != nil {
- return false
- }
- return subtle.ConstantTimeCompare(h, hash) == 1
-}
diff --git a/vendor/github.com/jameskeane/bcrypt/cipher.go b/vendor/github.com/jameskeane/bcrypt/cipher.go
deleted file mode 100644
index 00724e4..0000000
--- a/vendor/github.com/jameskeane/bcrypt/cipher.go
+++ /dev/null
@@ -1,415 +0,0 @@
-package bcrypt
-
-var p_orig = [18]uint{
- 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
- 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
- 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
- 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
- 0x9216d5d9, 0x8979fb1b,
-}
-
-var s_orig = [1024]uint{
- 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
- 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
- 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
- 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
- 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
- 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
- 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
- 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
- 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
- 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
- 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
- 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
- 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
- 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
- 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
- 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
- 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
- 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
- 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
- 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
- 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
- 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
- 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
- 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
- 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
- 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
- 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
- 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
- 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
- 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
- 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
- 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
- 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
- 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
- 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
- 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
- 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
- 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
- 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
- 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
- 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
- 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
- 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
- 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
- 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
- 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
- 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
- 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
- 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
- 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
- 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
- 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
- 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
- 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
- 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
- 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
- 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
- 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
- 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
- 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
- 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
- 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
- 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
- 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
- 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
- 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
- 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
- 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
- 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
- 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
- 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
- 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
- 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
- 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
- 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
- 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
- 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
- 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
- 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
- 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
- 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
- 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
- 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
- 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
- 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
- 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
- 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
- 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
- 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
- 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
- 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
- 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
- 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
- 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
- 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
- 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
- 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
- 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
- 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
- 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
- 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
- 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
- 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
- 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
- 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
- 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
- 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
- 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
- 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
- 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
- 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
- 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
- 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
- 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
- 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
- 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
- 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
- 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
- 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
- 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
- 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
- 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
- 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
- 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
- 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
- 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
- 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
- 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
- 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
- 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
- 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
- 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
- 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
- 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
- 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
- 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
- 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
- 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
- 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
- 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
- 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
- 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
- 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
- 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
- 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
- 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
- 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
- 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
- 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
- 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
- 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
- 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
- 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
- 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
- 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
- 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
- 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
- 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
- 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
- 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
- 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
- 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
- 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
- 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
- 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
- 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
- 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
- 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
- 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
- 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
- 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
- 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
- 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
- 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
- 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
- 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
- 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
- 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
- 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
- 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
- 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
- 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
- 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
- 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
- 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
- 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
- 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
- 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
- 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
- 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
- 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
- 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
- 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
- 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
- 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
- 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
- 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
- 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
- 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
- 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
- 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
- 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
- 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
- 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
- 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
- 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
- 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
- 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
- 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
- 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
- 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
- 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
- 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
- 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
- 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
- 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
- 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
- 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
- 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
- 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
- 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
- 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
- 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
- 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
- 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
- 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
- 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
- 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
- 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
- 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
- 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
- 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
- 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
- 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
- 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
- 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
- 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
- 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
- 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
- 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
- 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
- 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
- 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
- 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
- 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
- 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
- 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
- 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
- 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
- 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
- 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
- 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
- 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
- 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
- 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
- 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
-}
-
-var bf_crypt_ciphertext = [6]uint{
- 0x4f727068, 0x65616e42, 0x65686f6c,
- 0x64657253, 0x63727944, 0x6f756274,
-}
-
-
-type cipher struct {
- P [18]uint
- S [1024]uint
- data [6]uint
-}
-
-func (c *cipher) encipher(lr []uint, off int) {
- l := lr[off] ^ c.P[0]
- r := lr[off+1]
-
- for i := 0; i <= BlowfishRounds-2; i += 2{
- // Feistel substitution on left and right word respectively
- r ^= (((c.S[(l>>24)&0xff] + c.S[0x100|((l>>16)&0xff)]) ^ c.S[0x200|((l>>8)&0xff)]) + c.S[0x300|(l&0xff)]) ^ c.P[i+1]
- l ^= (((c.S[(r>>24)&0xff] + c.S[0x100|((r>>16)&0xff)]) ^ c.S[0x200|((r>>8)&0xff)]) + c.S[0x300|(r&0xff)]) ^ c.P[i+2]
- }
-
- lr[off] = r ^ c.P[BlowfishRounds+1]
- lr[off+1] = l
-}
-/**
- * Cycically extract a word of key material
- * @param data the string to extract the data from
- * @param off the current offset into the data
- * @return the next word of material from data and the next offset into the data
- */
-func streamtoword(data []byte, off int) (uint, int) {
- var word uint
- for i := 0; i < 4; i++ {
- word = (word << 8) | uint(data[off]&0xff)
- off = (off + 1) % len(data)
- }
-
- return word, off
-}
-
-/**
- * Key the Blowfish cipher
- * @param key an array containing the key
- */
-func (c *cipher) key(key []byte) {
- var word uint
- off := 0
- lr := []uint{0, 0}
- plen := len(c.P)
- slen := len(c.S)
-
- for i := 0; i < plen; i++ {
- word, off = streamtoword(key, off)
- c.P[i] = c.P[i] ^ word
- }
-
- for i := 0; i < plen; i += 2 {
- c.encipher(lr, 0)
- c.P[i] = lr[0]
- c.P[i+1] = lr[1]
- }
-
- for i := 0; i < slen; i += 2 {
- c.encipher(lr, 0)
- c.S[i] = lr[0]
- c.S[i+1] = lr[1]
- }
-}
-
-/**
- * Perform the "enhanced key schedule" step described by
- * Provos and Mazieres in "A Future-Adaptable Password Scheme"
- * http://www.openbsd.org/papers/bcrypt-paper.ps
- * @param data salt information
- * @param key password information
- */
-func (c *cipher) ekskey(data []byte, key []byte) {
- var word uint
- koff := 0
- doff := 0
- lr := []uint{0, 0}
- plen := len(c.P)
- slen := len(c.S)
-
- for i := 0; i < plen; i++ {
- word, koff = streamtoword(key, koff)
- c.P[i] = c.P[i] ^ word
- }
-
- for i := 0; i < plen; i += 2 {
- word, doff = streamtoword(data, doff)
- lr[0] ^= word
- word, doff = streamtoword(data, doff)
- lr[1] ^= word
- c.encipher(lr, 0)
- c.P[i] = lr[0]
- c.P[i+1] = lr[1]
- }
-
- for i := 0; i < slen; i += 2 {
- word, doff = streamtoword(data, doff)
- lr[0] ^= word
- word, doff = streamtoword(data, doff)
- lr[1] ^= word
- c.encipher(lr, 0)
- c.S[i] = lr[0]
- c.S[i+1] = lr[1]
- }
-}
-
-/**
- * Perform the central password hashing step in the
- * bcrypt scheme
- * @param password the password to hash
- * @param salt the binary salt to hash with the password
- * @param log_rounds the binary logarithm of the number
- * of rounds of hashing to apply
- * @return an array containing the binary hashed password
- */
-func crypt_raw(password []byte, salt []byte, log_rounds uint) []byte {
- c := &cipher{P:p_orig, S:s_orig, data:bf_crypt_ciphertext}
-
- rounds := 1 << log_rounds
- c.ekskey(salt, password)
- for i := 0; i < rounds; i++ {
- c.key(password)
- c.key(salt)
- }
-
- for i := 0; i < 64; i++ {
- for j := 0; j < (6 >> 1); j++ {
- c.encipher(c.data[:], j<<1)
- }
- }
-
- ret := make([]byte, 24)
- for i := 0; i < 6; i++ {
- k := i<<2
- ret[k] = (byte)((c.data[i] >> 24) & 0xff)
- ret[k+1] = (byte)((c.data[i] >> 16) & 0xff)
- ret[k+2] = (byte)((c.data[i] >> 8) & 0xff)
- ret[k+3] = (byte)(c.data[i] & 0xff)
- }
- return ret
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore
deleted file mode 100644
index daf913b..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/.gitignore
+++ /dev/null
@@ -1,24 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
diff --git a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
deleted file mode 100644
index 944cc00..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-# This is an example goreleaser.yaml file with some sane defaults.
-# Make sure to check the documentation at http://goreleaser.com
-
-builds:
- -
- id: "cpuid"
- binary: cpuid
- main: ./cmd/cpuid/main.go
- env:
- - CGO_ENABLED=0
- flags:
- - -ldflags=-s -w
- goos:
- - aix
- - linux
- - freebsd
- - netbsd
- - windows
- - darwin
- goarch:
- - 386
- - amd64
- - arm64
- goarm:
- - 7
-
-archives:
- -
- id: cpuid
- name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
- replacements:
- aix: AIX
- darwin: OSX
- linux: Linux
- windows: Windows
- 386: i386
- amd64: x86_64
- freebsd: FreeBSD
- netbsd: NetBSD
- format_overrides:
- - goos: windows
- format: zip
- files:
- - LICENSE
-checksum:
- name_template: 'checksums.txt'
-snapshot:
- name_template: "{{ .Tag }}-next"
-changelog:
- sort: asc
- filters:
- exclude:
- - '^doc:'
- - '^docs:'
- - '^test:'
- - '^tests:'
- - '^Update\sREADME.md'
-
-nfpms:
- -
- file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
- vendor: Klaus Post
- homepage: https://github.com/klauspost/cpuid
- maintainer: Klaus Post
- description: CPUID Tool
- license: BSD 3-Clause
- formats:
- - deb
- - rpm
- replacements:
- darwin: Darwin
- linux: Linux
- freebsd: FreeBSD
- amd64: x86_64
diff --git a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
deleted file mode 100644
index 2ef4714..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Developer Certificate of Origin
-Version 1.1
-
-Copyright (C) 2015- Klaus Post & Contributors.
-Email: klauspost@gmail.com
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-
-Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
-(c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
-(d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/v2/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE
deleted file mode 100644
index 5cec7ee..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/LICENSE
+++ /dev/null
@@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
deleted file mode 100644
index ea7df3d..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ /dev/null
@@ -1,258 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
-[![Build Status][3]][4]
-
-[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
-[4]: https://travis-ci.org/klauspost/cpuid
-
-## installing
-
-`go get -u github.com/klauspost/cpuid/v2` using modules.
-
-Drop `v2` for others.
-
-## example
-
-```Go
-package main
-
-import (
- "fmt"
- "strings"
-
- . "github.com/klauspost/cpuid/v2"
-)
-
-func main() {
- // Print basic CPU information:
- fmt.Println("Name:", CPU.BrandName)
- fmt.Println("PhysicalCores:", CPU.PhysicalCores)
- fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
- fmt.Println("LogicalCores:", CPU.LogicalCores)
- fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
- fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
- fmt.Println("Cacheline bytes:", CPU.CacheLine)
- fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
- fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
- fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
- fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
- fmt.Println("Frequency", CPU.Hz, "hz")
-
- // Test if we have these specific features:
- if CPU.Supports(SSE, SSE2) {
- fmt.Println("We have Streaming SIMD 2 Extensions")
- }
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: AMD Ryzen 9 3950X 16-Core Processor
-PhysicalCores: 16
-ThreadsPerCore: 2
-LogicalCores: 32
-Family 23 Model: 113 Vendor ID: AMD
-Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
-Cacheline bytes: 64
-L1 Data Cache: 32768 bytes
-L1 Instruction Cache: 32768 bytes
-L2 Cache: 524288 bytes
-L3 Cache: 16777216 bytes
-Frequency 0 hz
-We have Streaming SIMD 2 Extensions
-```
-
-# usage
-
-The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
-A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
-
-Note that for some cpu/os combinations some features will not be detected.
-`amd64` has rather good support and should work reliably on all platforms.
-
-Note that hypervisors may not pass through all CPU features.
-
-## arm64 feature detection
-
-Not all operating systems provide ARM features directly
-and there is no safe way to do so for the rest.
-
-Currently `arm64/linux` and `arm64/freebsd` should be quite reliable.
-`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
-
-A `DetectARM()` can be used if you are able to control your deployment,
-it will detect CPU features, but may crash if the OS doesn't intercept the calls.
-A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
-
-Note that currently only features are detected on ARM,
-no additional information is currently available.
-
-## flags
-
-It is possible to add flags that affects cpu detection.
-
-For this the `Flags()` command is provided.
-
-This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
-
-This means that any detection used in `init()` functions will not contain these flags.
-
-Example:
-
-```Go
-package main
-
-import (
- "flag"
- "fmt"
- "strings"
-
- "github.com/klauspost/cpuid/v2"
-)
-
-func main() {
- cpuid.Flags()
- flag.Parse()
- cpuid.Detect()
-
- // Test if we have these specific features:
- if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
- fmt.Println("We have Streaming SIMD 2 Extensions")
- }
-}
-```
-
-## commandline
-
-Download as binary from: https://github.com/klauspost/cpuid/releases
-
-Install from source:
-
-`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
-
-### Example
-
-```
-λ cpuid
-Name: AMD Ryzen 9 3950X 16-Core Processor
-Vendor String: AuthenticAMD
-Vendor ID: AMD
-PhysicalCores: 16
-Threads Per Core: 2
-Logical Cores: 32
-CPU Family 23 Model: 113
-Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
-Microarchitecture level: 3
-Cacheline bytes: 64
-L1 Instruction Cache: 32768 bytes
-L1 Data Cache: 32768 bytes
-L2 Cache: 524288 bytes
-L3 Cache: 16777216 bytes
-
-```
-### JSON Output:
-
-```
-λ cpuid --json
-{
- "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
- "VendorID": 2,
- "VendorString": "AuthenticAMD",
- "PhysicalCores": 16,
- "ThreadsPerCore": 2,
- "LogicalCores": 32,
- "Family": 23,
- "Model": 113,
- "CacheLine": 64,
- "Hz": 0,
- "BoostFreq": 0,
- "Cache": {
- "L1I": 32768,
- "L1D": 32768,
- "L2": 524288,
- "L3": 16777216
- },
- "SGX": {
- "Available": false,
- "LaunchControl": false,
- "SGX1Supported": false,
- "SGX2Supported": false,
- "MaxEnclaveSizeNot64": 0,
- "MaxEnclaveSize64": 0,
- "EPCSections": null
- },
- "Features": [
- "ADX",
- "AESNI",
- "AVX",
- "AVX2",
- "BMI1",
- "BMI2",
- "CLMUL",
- "CLZERO",
- "CMOV",
- "CMPXCHG8",
- "CPBOOST",
- "CX16",
- "F16C",
- "FMA3",
- "FXSR",
- "FXSROPT",
- "HTT",
- "HYPERVISOR",
- "LAHF",
- "LZCNT",
- "MCAOVERFLOW",
- "MMX",
- "MMXEXT",
- "MOVBE",
- "NX",
- "OSXSAVE",
- "POPCNT",
- "RDRAND",
- "RDSEED",
- "RDTSCP",
- "SCE",
- "SHA",
- "SSE",
- "SSE2",
- "SSE3",
- "SSE4",
- "SSE42",
- "SSE4A",
- "SSSE3",
- "SUCCOR",
- "X87",
- "XSAVE"
- ],
- "X64Level": 3
-}
-```
-
-### Check CPU microarch level
-
-```
-λ cpuid --check-level=3
-2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
-2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
-Exit Code 0
-
-λ cpuid --check-level=4
-2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
-2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
-Exit Code 1
-```
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
deleted file mode 100644
index 27f3325..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ /dev/null
@@ -1,1291 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// Package cpuid provides information about the CPU running the current program.
-//
-// CPU features are detected on startup, and kept for fast access through the life of the application.
-// Currently x86 / x64 (AMD64) as well as arm64 is supported.
-//
-// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-//
-// Package home: https://github.com/klauspost/cpuid
-package cpuid
-
-import (
- "flag"
- "fmt"
- "math"
- "math/bits"
- "os"
- "runtime"
- "strings"
-)
-
-// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
-// and Processor Programming Reference (PPR)
-
-// Vendor is a representation of a CPU vendor.
-type Vendor int
-
-const (
- VendorUnknown Vendor = iota
- Intel
- AMD
- VIA
- Transmeta
- NSC
- KVM // Kernel-based Virtual Machine
- MSVM // Microsoft Hyper-V or Windows Virtual PC
- VMware
- XenHVM
- Bhyve
- Hygon
- SiS
- RDC
-
- Ampere
- ARM
- Broadcom
- Cavium
- DEC
- Fujitsu
- Infineon
- Motorola
- NVIDIA
- AMCC
- Qualcomm
- Marvell
-
- lastVendor
-)
-
-//go:generate stringer -type=FeatureID,Vendor
-
-// FeatureID is the ID of a specific cpu feature.
-type FeatureID int
-
-const (
- // Keep index -1 as unknown
- UNKNOWN = -1
-
- // Add features
- ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
- AESNI // Advanced Encryption Standard New Instructions
- AMD3DNOW // AMD 3DNOW
- AMD3DNOWEXT // AMD 3DNowExt
- AMXBF16 // Tile computational operations on BFLOAT16 numbers
- AMXINT8 // Tile computational operations on 8-bit integers
- AMXTILE // Tile architecture
- AVX // AVX functions
- AVX2 // AVX2 functions
- AVX512BF16 // AVX-512 BFLOAT16 Instructions
- AVX512BITALG // AVX-512 Bit Algorithms
- AVX512BW // AVX-512 Byte and Word Instructions
- AVX512CD // AVX-512 Conflict Detection Instructions
- AVX512DQ // AVX-512 Doubleword and Quadword Instructions
- AVX512ER // AVX-512 Exponential and Reciprocal Instructions
- AVX512F // AVX-512 Foundation
- AVX512FP16 // AVX-512 FP16 Instructions
- AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
- AVX512PF // AVX-512 Prefetch Instructions
- AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
- AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
- AVX512VL // AVX-512 Vector Length Extensions
- AVX512VNNI // AVX-512 Vector Neural Network Instructions
- AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
- AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
- AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
- AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
- BMI1 // Bit Manipulation Instruction Set 1
- BMI2 // Bit Manipulation Instruction Set 2
- CETIBT // Intel CET Indirect Branch Tracking
- CETSS // Intel CET Shadow Stack
- CLDEMOTE // Cache Line Demote
- CLMUL // Carry-less Multiplication
- CLZERO // CLZERO instruction supported
- CMOV // i686 CMOV
- CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
- CMPXCHG8 // CMPXCHG8 instruction
- CPBOOST // Core Performance Boost
- CX16 // CMPXCHG16B Instruction
- ENQCMD // Enqueue Command
- ERMS // Enhanced REP MOVSB/STOSB
- F16C // Half-precision floating-point conversion
- FMA3 // Intel FMA 3. Does not imply AVX.
- FMA4 // Bulldozer FMA4 functions
- FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
- FXSROPT // FXSAVE/FXRSTOR optimizations
- GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
- HLE // Hardware Lock Elision
- HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
- HTT // Hyperthreading (enabled)
- HWA // Hardware assert supported. Indicates support for MSRC001_10
- HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
- IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
- IBS // Instruction Based Sampling (AMD)
- IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
- IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
- IBSFFV // Instruction Based Sampling Feature (AMD)
- IBSOPCNT // Instruction Based Sampling Feature (AMD)
- IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
- IBSOPSAM // Instruction Based Sampling Feature (AMD)
- IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
- IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
- IBS_PREVENTHOST // Disallowing IBS use by the host supported
- INT_WBINVD // WBINVD/WBNOINVD are interruptible.
- INVLPGB // NVLPGB and TLBSYNC instruction supported
- LAHF // LAHF/SAHF in long mode
- LAM // If set, CPU supports Linear Address Masking
- LBRVIRT // LBR virtualization
- LZCNT // LZCNT instruction
- MCAOVERFLOW // MCA overflow recovery support.
- MCOMMIT // MCOMMIT instruction supported
- MMX // standard MMX
- MMXEXT // SSE integer functions or AMD MMX ext
- MOVBE // MOVBE instruction (big-endian)
- MOVDIR64B // Move 64 Bytes as Direct Store
- MOVDIRI // Move Doubleword as Direct Store
- MOVSB_ZL // Fast Zero-Length MOVSB
- MPX // Intel MPX (Memory Protection Extensions)
- MSRIRC // Instruction Retired Counter MSR available
- MSR_PAGEFLUSH // Page Flush MSR available
- NRIPS // Indicates support for NRIP save on VMEXIT
- NX // NX (No-Execute) bit
- OSXSAVE // XSAVE enabled by OS
- PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
- POPCNT // POPCNT instruction
- RDPRU // RDPRU instruction supported
- RDRAND // RDRAND instruction is available
- RDSEED // RDSEED instruction is available
- RDTSCP // RDTSCP Instruction
- RTM // Restricted Transactional Memory
- RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
- SERIALIZE // Serialize Instruction Execution
- SEV // AMD Secure Encrypted Virtualization supported
- SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
- SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
- SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
- SEV_ES // AMD SEV Encrypted State supported
- SEV_RESTRICTED // AMD SEV Restricted Injection supported
- SEV_SNP // AMD SEV Secure Nested Paging supported
- SGX // Software Guard Extensions
- SGXLC // Software Guard Extensions Launch Control
- SHA // Intel SHA Extensions
- SME // AMD Secure Memory Encryption supported
- SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
- SSE // SSE functions
- SSE2 // P4 SSE functions
- SSE3 // Prescott SSE3 functions
- SSE4 // Penryn SSE4.1 functions
- SSE42 // Nehalem SSE4.2 functions
- SSE4A // AMD Barcelona microarchitecture SSE4a instructions
- SSSE3 // Conroe SSSE3 functions
- STIBP // Single Thread Indirect Branch Predictors
- STOSB_SHORT // Fast short STOSB
- SUCCOR // Software uncorrectable error containment and recovery capability.
- SVM // AMD Secure Virtual Machine
- SVMDA // Indicates support for the SVM decode assists.
- SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
- SVML // AMD SVM lock. Indicates support for SVM-Lock.
- SVMNP // AMD SVM nested paging
- SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
- SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
- SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
- SYSEE // SYSENTER and SYSEXIT instructions
- TBM // AMD Trailing Bit Manipulation
- TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
- TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
- TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
- TSXLDTRK // Intel TSX Suspend Load Address Tracking
- VAES // Vector AES. AVX(512) versions requires additional checks.
- VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
- VMPL // AMD VM Permission Levels supported
- VMSA_REGPROT // AMD VMSA Register Protection supported
- VMX // Virtual Machine Extensions
- VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
- VTE // AMD Virtual Transparent Encryption supported
- WAITPKG // TPAUSE, UMONITOR, UMWAIT
- WBNOINVD // Write Back and Do Not Invalidate Cache
- X87 // FPU
- XGETBV1 // Supports XGETBV with ECX = 1
- XOP // Bulldozer XOP functions
- XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
- XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
- XSAVEOPT // XSAVEOPT available
- XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
-
- // ARM features:
- AESARM // AES instructions
- ARMCPUID // Some CPU ID registers readable at user-level
- ASIMD // Advanced SIMD
- ASIMDDP // SIMD Dot Product
- ASIMDHP // Advanced SIMD half-precision floating point
- ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
- ATOMICS // Large System Extensions (LSE)
- CRC32 // CRC32/CRC32C instructions
- DCPOP // Data cache clean to Point of Persistence (DC CVAP)
- EVTSTRM // Generic timer
- FCMA // Floatin point complex number addition and multiplication
- FP // Single-precision and double-precision floating point
- FPHP // Half-precision floating point
- GPA // Generic Pointer Authentication
- JSCVT // Javascript-style double->int convert (FJCVTZS)
- LRCPC // Weaker release consistency (LDAPR, etc)
- PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
- SHA1 // SHA-1 instructions (SHA1C, etc)
- SHA2 // SHA-2 instructions (SHA256H, etc)
- SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
- SHA512 // SHA512 instructions
- SM3 // SM3 instructions
- SM4 // SM4 instructions
- SVE // Scalable Vector Extension
- // Keep it last. It automatically defines the size of []flagSet
- lastID
-
- firstID FeatureID = UNKNOWN + 1
-)
-
-// CPUInfo contains information about the detected system CPU.
-type CPUInfo struct {
- BrandName string // Brand name reported by the CPU
- VendorID Vendor // Comparable CPU vendor ID
- VendorString string // Raw vendor string.
- featureSet flagSet // Features of the CPU
- PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
- ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
- LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
- Family int // CPU family number
- Model int // CPU model number
- Stepping int // CPU stepping info
- CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
- Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
- BoostFreq int64 // Max clock speed, if known, 0 otherwise
- Cache struct {
- L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
- L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
- L2 int // L2 Cache (per core or shared). Will be -1 if undetected
- L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
- }
- SGX SGXSupport
- maxFunc uint32
- maxExFunc uint32
-}
-
-var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
-var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-var xgetbv func(index uint32) (eax, edx uint32)
-var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
-var darwinHasAVX512 = func() bool { return false }
-
-// CPU contains information about the CPU as detected on startup,
-// or when Detect last was called.
-//
-// Use this as the primary entry point to you data.
-var CPU CPUInfo
-
-func init() {
- initCPU()
- Detect()
-}
-
-// Detect will re-detect current CPU info.
-// This will replace the content of the exported CPU variable.
-//
-// Unless you expect the CPU to change while you are running your program
-// you should not need to call this function.
-// If you call this, you must ensure that no other goroutine is accessing the
-// exported CPU variable.
-func Detect() {
- // Set defaults
- CPU.ThreadsPerCore = 1
- CPU.Cache.L1I = -1
- CPU.Cache.L1D = -1
- CPU.Cache.L2 = -1
- CPU.Cache.L3 = -1
- safe := true
- if detectArmFlag != nil {
- safe = !*detectArmFlag
- }
- addInfo(&CPU, safe)
- if displayFeats != nil && *displayFeats {
- fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
- // Exit with non-zero so tests will print value.
- os.Exit(1)
- }
- if disableFlag != nil {
- s := strings.Split(*disableFlag, ",")
- for _, feat := range s {
- feat := ParseFeature(strings.TrimSpace(feat))
- if feat != UNKNOWN {
- CPU.featureSet.unset(feat)
- }
- }
- }
-}
-
-// DetectARM will detect ARM64 features.
-// This is NOT done automatically since it can potentially crash
-// if the OS does not handle the command.
-// If in the future this can be done safely this function may not
-// do anything.
-func DetectARM() {
- addInfo(&CPU, false)
-}
-
-var detectArmFlag *bool
-var displayFeats *bool
-var disableFlag *string
-
-// Flags will enable flags.
-// This must be called *before* flag.Parse AND
-// Detect must be called after the flags have been parsed.
-// Note that this means that any detection used in init() functions
-// will not contain these flags.
-func Flags() {
- disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
- displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
- detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
-}
-
-// Supports returns whether the CPU supports all of the requested features.
-func (c CPUInfo) Supports(ids ...FeatureID) bool {
- for _, id := range ids {
- if !c.featureSet.inSet(id) {
- return false
- }
- }
- return true
-}
-
-// Has allows for checking a single feature.
-// Should be inlined by the compiler.
-func (c CPUInfo) Has(id FeatureID) bool {
- return c.featureSet.inSet(id)
-}
-
-// AnyOf returns whether the CPU supports one or more of the requested features.
-func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
- for _, id := range ids {
- if c.featureSet.inSet(id) {
- return true
- }
- }
- return false
-}
-
-// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
-var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
-var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
-var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
-
-// X64Level returns the microarchitecture level detected on the CPU.
-// If features are lacking or non x64 mode, 0 is returned.
-// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-func (c CPUInfo) X64Level() int {
- if c.featureSet.hasSet(level4Features) {
- return 4
- }
- if c.featureSet.hasSet(level3Features) {
- return 3
- }
- if c.featureSet.hasSet(level2Features) {
- return 2
- }
- if c.featureSet.hasSet(level1Features) {
- return 1
- }
- return 0
-}
-
-// Disable will disable one or several features.
-func (c *CPUInfo) Disable(ids ...FeatureID) bool {
- for _, id := range ids {
- c.featureSet.unset(id)
- }
- return true
-}
-
-// Enable will disable one or several features even if they were undetected.
-// This is of course not recommended for obvious reasons.
-func (c *CPUInfo) Enable(ids ...FeatureID) bool {
- for _, id := range ids {
- c.featureSet.set(id)
- }
- return true
-}
-
-// IsVendor returns true if vendor is recognized as Intel
-func (c CPUInfo) IsVendor(v Vendor) bool {
- return c.VendorID == v
-}
-
-// FeatureSet returns all available features as strings.
-func (c CPUInfo) FeatureSet() []string {
- s := make([]string, 0, c.featureSet.nEnabled())
- s = append(s, c.featureSet.Strings()...)
- return s
-}
-
-// RTCounter returns the 64-bit time-stamp counter
-// Uses the RDTSCP instruction. The value 0 is returned
-// if the CPU does not support the instruction.
-func (c CPUInfo) RTCounter() uint64 {
- if !c.Supports(RDTSCP) {
- return 0
- }
- a, _, _, d := rdtscpAsm()
- return uint64(a) | (uint64(d) << 32)
-}
-
-// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
-// This variable is OS dependent, but on Linux contains information
-// about the current cpu/core the code is running on.
-// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
-func (c CPUInfo) Ia32TscAux() uint32 {
- if !c.Supports(RDTSCP) {
- return 0
- }
- _, _, ecx, _ := rdtscpAsm()
- return ecx
-}
-
-// LogicalCPU will return the Logical CPU the code is currently executing on.
-// This is likely to change when the OS re-schedules the running thread
-// to another CPU.
-// If the current core cannot be detected, -1 will be returned.
-func (c CPUInfo) LogicalCPU() int {
- if c.maxFunc < 1 {
- return -1
- }
- _, ebx, _, _ := cpuid(1)
- return int(ebx >> 24)
-}
-
-// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
-// supported, use it, otherwise parse the brand string. Yes, really.
-func (c *CPUInfo) frequencies() {
- c.Hz, c.BoostFreq = 0, 0
- mfi := maxFunctionID()
- if mfi >= 0x15 {
- eax, ebx, ecx, _ := cpuid(0x15)
- if eax != 0 && ebx != 0 && ecx != 0 {
- c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
- }
- }
- if mfi >= 0x16 {
- a, b, _, _ := cpuid(0x16)
- // Base...
- if a&0xffff > 0 {
- c.Hz = int64(a&0xffff) * 1_000_000
- }
- // Boost...
- if b&0xffff > 0 {
- c.BoostFreq = int64(b&0xffff) * 1_000_000
- }
- }
- if c.Hz > 0 {
- return
- }
-
- // computeHz determines the official rated speed of a CPU from its brand
- // string. This insanity is *actually the official documented way to do
- // this according to Intel*, prior to leaf 0x15 existing. The official
- // documentation only shows this working for exactly `x.xx` or `xxxx`
- // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
- // sizes.
- model := c.BrandName
- hz := strings.LastIndex(model, "Hz")
- if hz < 3 {
- return
- }
- var multiplier int64
- switch model[hz-1] {
- case 'M':
- multiplier = 1000 * 1000
- case 'G':
- multiplier = 1000 * 1000 * 1000
- case 'T':
- multiplier = 1000 * 1000 * 1000 * 1000
- }
- if multiplier == 0 {
- return
- }
- freq := int64(0)
- divisor := int64(0)
- decimalShift := int64(1)
- var i int
- for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
- if model[i] >= '0' && model[i] <= '9' {
- freq += int64(model[i]-'0') * decimalShift
- decimalShift *= 10
- } else if model[i] == '.' {
- if divisor != 0 {
- return
- }
- divisor = decimalShift
- } else {
- return
- }
- }
- // we didn't find a space
- if i < 0 {
- return
- }
- if divisor != 0 {
- c.Hz = (freq * multiplier) / divisor
- return
- }
- c.Hz = freq * multiplier
-}
-
-// VM Will return true if the cpu id indicates we are in
-// a virtual machine.
-func (c CPUInfo) VM() bool {
- return CPU.featureSet.inSet(HYPERVISOR)
-}
-
-// flags contains detected cpu features and characteristics
-type flags uint64
-
-// log2(bits_in_uint64)
-const flagBitsLog2 = 6
-const flagBits = 1 << flagBitsLog2
-const flagMask = flagBits - 1
-
-// flagSet contains detected cpu features and characteristics in an array of flags
-type flagSet [(lastID + flagMask) / flagBits]flags
-
-func (s flagSet) inSet(feat FeatureID) bool {
- return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
-}
-
-func (s *flagSet) set(feat FeatureID) {
- s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
-}
-
-// setIf will set a feature if boolean is true.
-func (s *flagSet) setIf(cond bool, features ...FeatureID) {
- if cond {
- for _, offset := range features {
- s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
- }
- }
-}
-
-func (s *flagSet) unset(offset FeatureID) {
- bit := flags(1 << (offset & flagMask))
- s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
-}
-
-// or with another flagset.
-func (s *flagSet) or(other flagSet) {
- for i, v := range other[:] {
- s[i] |= v
- }
-}
-
-// hasSet returns whether all features are present.
-func (s flagSet) hasSet(other flagSet) bool {
- for i, v := range other[:] {
- if s[i]&v != v {
- return false
- }
- }
- return true
-}
-
-// nEnabled will return the number of enabled flags.
-func (s flagSet) nEnabled() (n int) {
- for _, v := range s[:] {
- n += bits.OnesCount64(uint64(v))
- }
- return n
-}
-
-func flagSetWith(feat ...FeatureID) flagSet {
- var res flagSet
- for _, f := range feat {
- res.set(f)
- }
- return res
-}
-
-// ParseFeature will parse the string and return the ID of the matching feature.
-// Will return UNKNOWN if not found.
-func ParseFeature(s string) FeatureID {
- s = strings.ToUpper(s)
- for i := firstID; i < lastID; i++ {
- if i.String() == s {
- return i
- }
- }
- return UNKNOWN
-}
-
-// Strings returns an array of the detected features for FlagsSet.
-func (s flagSet) Strings() []string {
- if len(s) == 0 {
- return []string{""}
- }
- r := make([]string, 0)
- for i := firstID; i < lastID; i++ {
- if s.inSet(i) {
- r = append(r, i.String())
- }
- }
- return r
-}
-
-func maxExtendedFunction() uint32 {
- eax, _, _, _ := cpuid(0x80000000)
- return eax
-}
-
-func maxFunctionID() uint32 {
- a, _, _, _ := cpuid(0)
- return a
-}
-
-func brandName() string {
- if maxExtendedFunction() >= 0x80000004 {
- v := make([]uint32, 0, 48)
- for i := uint32(0); i < 3; i++ {
- a, b, c, d := cpuid(0x80000002 + i)
- v = append(v, a, b, c, d)
- }
- return strings.Trim(string(valAsString(v...)), " ")
- }
- return "unknown"
-}
-
-func threadsPerCore() int {
- mfi := maxFunctionID()
- vend, _ := vendorID()
-
- if mfi < 0x4 || (vend != Intel && vend != AMD) {
- return 1
- }
-
- if mfi < 0xb {
- if vend != Intel {
- return 1
- }
- _, b, _, d := cpuid(1)
- if (d & (1 << 28)) != 0 {
- // v will contain logical core count
- v := (b >> 16) & 255
- if v > 1 {
- a4, _, _, _ := cpuid(4)
- // physical cores
- v2 := (a4 >> 26) + 1
- if v2 > 0 {
- return int(v) / int(v2)
- }
- }
- }
- return 1
- }
- _, b, _, _ := cpuidex(0xb, 0)
- if b&0xffff == 0 {
- if vend == AMD {
- // Workaround for AMD returning 0, assume 2 if >= Zen 2
- // It will be more correct than not.
- fam, _, _ := familyModel()
- _, _, _, d := cpuid(1)
- if (d&(1<<28)) != 0 && fam >= 23 {
- return 2
- }
- }
- return 1
- }
- return int(b & 0xffff)
-}
-
-func logicalCores() int {
- mfi := maxFunctionID()
- v, _ := vendorID()
- switch v {
- case Intel:
- // Use this on old Intel processors
- if mfi < 0xb {
- if mfi < 1 {
- return 0
- }
- // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
- // that can be assigned to logical processors in a physical package.
- // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
- _, ebx, _, _ := cpuid(1)
- logical := (ebx >> 16) & 0xff
- return int(logical)
- }
- _, b, _, _ := cpuidex(0xb, 1)
- return int(b & 0xffff)
- case AMD, Hygon:
- _, b, _, _ := cpuid(1)
- return int((b >> 16) & 0xff)
- default:
- return 0
- }
-}
-
-func familyModel() (family, model, stepping int) {
- if maxFunctionID() < 0x1 {
- return 0, 0, 0
- }
- eax, _, _, _ := cpuid(1)
- // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
- family = int((eax >> 8) & 0xf)
- extFam := family == 0x6 // Intel is 0x6, needs extended model.
- if family == 0xf {
- // Add ExtFamily
- family += int((eax >> 20) & 0xff)
- extFam = true
- }
- // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
- model = int((eax >> 4) & 0xf)
- if extFam {
- // Add ExtModel
- model += int((eax >> 12) & 0xf0)
- }
- stepping = int(eax & 0xf)
- return family, model, stepping
-}
-
-func physicalCores() int {
- v, _ := vendorID()
- switch v {
- case Intel:
- return logicalCores() / threadsPerCore()
- case AMD, Hygon:
- lc := logicalCores()
- tpc := threadsPerCore()
- if lc > 0 && tpc > 0 {
- return lc / tpc
- }
-
- // The following is inaccurate on AMD EPYC 7742 64-Core Processor
- if maxExtendedFunction() >= 0x80000008 {
- _, _, c, _ := cpuid(0x80000008)
- if c&0xff > 0 {
- return int(c&0xff) + 1
- }
- }
- }
- return 0
-}
-
-// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
-var vendorMapping = map[string]Vendor{
- "AMDisbetter!": AMD,
- "AuthenticAMD": AMD,
- "CentaurHauls": VIA,
- "GenuineIntel": Intel,
- "TransmetaCPU": Transmeta,
- "GenuineTMx86": Transmeta,
- "Geode by NSC": NSC,
- "VIA VIA VIA ": VIA,
- "KVMKVMKVMKVM": KVM,
- "Microsoft Hv": MSVM,
- "VMwareVMware": VMware,
- "XenVMMXenVMM": XenHVM,
- "bhyve bhyve ": Bhyve,
- "HygonGenuine": Hygon,
- "Vortex86 SoC": SiS,
- "SiS SiS SiS ": SiS,
- "RiseRiseRise": SiS,
- "Genuine RDC": RDC,
-}
-
-func vendorID() (Vendor, string) {
- _, b, c, d := cpuid(0)
- v := string(valAsString(b, d, c))
- vend, ok := vendorMapping[v]
- if !ok {
- return VendorUnknown, v
- }
- return vend, v
-}
-
-func cacheLine() int {
- if maxFunctionID() < 0x1 {
- return 0
- }
-
- _, ebx, _, _ := cpuid(1)
- cache := (ebx & 0xff00) >> 5 // cflush size
- if cache == 0 && maxExtendedFunction() >= 0x80000006 {
- _, _, ecx, _ := cpuid(0x80000006)
- cache = ecx & 0xff // cacheline size
- }
- // TODO: Read from Cache and TLB Information
- return int(cache)
-}
-
-func (c *CPUInfo) cacheSize() {
- c.Cache.L1D = -1
- c.Cache.L1I = -1
- c.Cache.L2 = -1
- c.Cache.L3 = -1
- vendor, _ := vendorID()
- switch vendor {
- case Intel:
- if maxFunctionID() < 4 {
- return
- }
- c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
- for i := uint32(0); ; i++ {
- eax, ebx, ecx, _ := cpuidex(4, i)
- cacheType := eax & 15
- if cacheType == 0 {
- break
- }
- cacheLevel := (eax >> 5) & 7
- coherency := int(ebx&0xfff) + 1
- partitions := int((ebx>>12)&0x3ff) + 1
- associativity := int((ebx>>22)&0x3ff) + 1
- sets := int(ecx) + 1
- size := associativity * partitions * coherency * sets
- switch cacheLevel {
- case 1:
- if cacheType == 1 {
- // 1 = Data Cache
- c.Cache.L1D = size
- } else if cacheType == 2 {
- // 2 = Instruction Cache
- c.Cache.L1I = size
- } else {
- if c.Cache.L1D < 0 {
- c.Cache.L1I = size
- }
- if c.Cache.L1I < 0 {
- c.Cache.L1I = size
- }
- }
- case 2:
- c.Cache.L2 = size
- case 3:
- c.Cache.L3 = size
- }
- }
- case AMD, Hygon:
- // Untested.
- if maxExtendedFunction() < 0x80000005 {
- return
- }
- _, _, ecx, edx := cpuid(0x80000005)
- c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
- c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
-
- if maxExtendedFunction() < 0x80000006 {
- return
- }
- _, _, ecx, _ = cpuid(0x80000006)
- c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
-
- // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
- if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
- return
- }
-
- // Xen Hypervisor is buggy and returns the same entry no matter ECX value.
- // Hack: When we encounter the same entry 100 times we break.
- nSame := 0
- var last uint32
- for i := uint32(0); i < math.MaxUint32; i++ {
- eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
-
- level := (eax >> 5) & 7
- cacheNumSets := ecx + 1
- cacheLineSize := 1 + (ebx & 2047)
- cachePhysPartitions := 1 + ((ebx >> 12) & 511)
- cacheNumWays := 1 + ((ebx >> 22) & 511)
-
- typ := eax & 15
- size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
- if typ == 0 {
- return
- }
-
- // Check for the same value repeated.
- comb := eax ^ ebx ^ ecx
- if comb == last {
- nSame++
- if nSame == 100 {
- return
- }
- }
- last = comb
-
- switch level {
- case 1:
- switch typ {
- case 1:
- // Data cache
- c.Cache.L1D = size
- case 2:
- // Inst cache
- c.Cache.L1I = size
- default:
- if c.Cache.L1D < 0 {
- c.Cache.L1I = size
- }
- if c.Cache.L1I < 0 {
- c.Cache.L1I = size
- }
- }
- case 2:
- c.Cache.L2 = size
- case 3:
- c.Cache.L3 = size
- }
- }
- }
-}
-
-type SGXEPCSection struct {
- BaseAddress uint64
- EPCSize uint64
-}
-
-type SGXSupport struct {
- Available bool
- LaunchControl bool
- SGX1Supported bool
- SGX2Supported bool
- MaxEnclaveSizeNot64 int64
- MaxEnclaveSize64 int64
- EPCSections []SGXEPCSection
-}
-
-func hasSGX(available, lc bool) (rval SGXSupport) {
- rval.Available = available
-
- if !available {
- return
- }
-
- rval.LaunchControl = lc
-
- a, _, _, d := cpuidex(0x12, 0)
- rval.SGX1Supported = a&0x01 != 0
- rval.SGX2Supported = a&0x02 != 0
- rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
- rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
- rval.EPCSections = make([]SGXEPCSection, 0)
-
- for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
- eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
- leafType := eax & 0xf
-
- if leafType == 0 {
- // Invalid subleaf, stop iterating
- break
- } else if leafType == 1 {
- // EPC Section subleaf
- baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
- size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
-
- section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
- rval.EPCSections = append(rval.EPCSections, section)
- }
- }
-
- return
-}
-
-func support() flagSet {
- var fs flagSet
- mfi := maxFunctionID()
- vend, _ := vendorID()
- if mfi < 0x1 {
- return fs
- }
- family, model, _ := familyModel()
-
- _, _, c, d := cpuid(1)
- fs.setIf((d&(1<<0)) != 0, X87)
- fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
- fs.setIf((d&(1<<11)) != 0, SYSEE)
- fs.setIf((d&(1<<15)) != 0, CMOV)
- fs.setIf((d&(1<<23)) != 0, MMX)
- fs.setIf((d&(1<<24)) != 0, FXSR)
- fs.setIf((d&(1<<25)) != 0, FXSROPT)
- fs.setIf((d&(1<<25)) != 0, SSE)
- fs.setIf((d&(1<<26)) != 0, SSE2)
- fs.setIf((c&1) != 0, SSE3)
- fs.setIf((c&(1<<5)) != 0, VMX)
- fs.setIf((c&(1<<9)) != 0, SSSE3)
- fs.setIf((c&(1<<19)) != 0, SSE4)
- fs.setIf((c&(1<<20)) != 0, SSE42)
- fs.setIf((c&(1<<25)) != 0, AESNI)
- fs.setIf((c&(1<<1)) != 0, CLMUL)
- fs.setIf(c&(1<<22) != 0, MOVBE)
- fs.setIf(c&(1<<23) != 0, POPCNT)
- fs.setIf(c&(1<<30) != 0, RDRAND)
-
- // This bit has been reserved by Intel & AMD for use by hypervisors,
- // and indicates the presence of a hypervisor.
- fs.setIf(c&(1<<31) != 0, HYPERVISOR)
- fs.setIf(c&(1<<29) != 0, F16C)
- fs.setIf(c&(1<<13) != 0, CX16)
-
- if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
- fs.setIf(threadsPerCore() > 1, HTT)
- }
- if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
- fs.setIf(threadsPerCore() > 1, HTT)
- }
- fs.setIf(c&1<<26 != 0, XSAVE)
- fs.setIf(c&1<<27 != 0, OSXSAVE)
- // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
- const avxCheck = 1<<26 | 1<<27 | 1<<28
- if c&avxCheck == avxCheck {
- // Check for OS support
- eax, _ := xgetbv(0)
- if (eax & 0x6) == 0x6 {
- fs.set(AVX)
- switch vend {
- case Intel:
- // Older than Haswell.
- fs.setIf(family == 6 && model < 60, AVXSLOW)
- case AMD:
- // Older than Zen 2
- fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
- }
- }
- }
- // FMA3 can be used with SSE registers, so no OS support is strictly needed.
- // fma3 and OSXSAVE needed.
- const fma3Check = 1<<12 | 1<<27
- fs.setIf(c&fma3Check == fma3Check, FMA3)
-
- // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
- if mfi >= 7 {
- _, ebx, ecx, edx := cpuidex(7, 0)
- if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
- fs.set(AVX2)
- }
- // CPUID.(EAX=7, ECX=0).EBX
- if (ebx & 0x00000008) != 0 {
- fs.set(BMI1)
- fs.setIf((ebx&0x00000100) != 0, BMI2)
- }
- fs.setIf(ebx&(1<<2) != 0, SGX)
- fs.setIf(ebx&(1<<4) != 0, HLE)
- fs.setIf(ebx&(1<<9) != 0, ERMS)
- fs.setIf(ebx&(1<<11) != 0, RTM)
- fs.setIf(ebx&(1<<14) != 0, MPX)
- fs.setIf(ebx&(1<<18) != 0, RDSEED)
- fs.setIf(ebx&(1<<19) != 0, ADX)
- fs.setIf(ebx&(1<<29) != 0, SHA)
-
- // CPUID.(EAX=7, ECX=0).ECX
- fs.setIf(ecx&(1<<5) != 0, WAITPKG)
- fs.setIf(ecx&(1<<7) != 0, CETSS)
- fs.setIf(ecx&(1<<8) != 0, GFNI)
- fs.setIf(ecx&(1<<9) != 0, VAES)
- fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
- fs.setIf(ecx&(1<<13) != 0, TME)
- fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
- fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
- fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
- fs.setIf(ecx&(1<<29) != 0, ENQCMD)
- fs.setIf(ecx&(1<<30) != 0, SGXLC)
-
- // CPUID.(EAX=7, ECX=0).EDX
- fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
- fs.setIf(edx&(1<<14) != 0, SERIALIZE)
- fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
- fs.setIf(edx&(1<<18) != 0, PCONFIG)
- fs.setIf(edx&(1<<20) != 0, CETIBT)
- fs.setIf(edx&(1<<26) != 0, IBPB)
- fs.setIf(edx&(1<<27) != 0, STIBP)
-
- // CPUID.(EAX=7, ECX=1)
- eax1, _, _, _ := cpuidex(7, 1)
- fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
- fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
- fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
- fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
- fs.setIf(eax1&(1<<22) != 0, HRESET)
- fs.setIf(eax1&(1<<26) != 0, LAM)
-
- // Only detect AVX-512 features if XGETBV is supported
- if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
- // Check for OS support
- eax, _ := xgetbv(0)
-
- // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- // ZMM16-ZMM31 state are enabled by OS)
- /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
- hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
- if runtime.GOOS == "darwin" {
- hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
- }
- if hasAVX512 {
- fs.setIf(ebx&(1<<16) != 0, AVX512F)
- fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
- fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
- fs.setIf(ebx&(1<<26) != 0, AVX512PF)
- fs.setIf(ebx&(1<<27) != 0, AVX512ER)
- fs.setIf(ebx&(1<<28) != 0, AVX512CD)
- fs.setIf(ebx&(1<<30) != 0, AVX512BW)
- fs.setIf(ebx&(1<<31) != 0, AVX512VL)
- // ecx
- fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
- fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
- fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
- fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
- fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
- // edx
- fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
- fs.setIf(edx&(1<<22) != 0, AMXBF16)
- fs.setIf(edx&(1<<23) != 0, AVX512FP16)
- fs.setIf(edx&(1<<24) != 0, AMXTILE)
- fs.setIf(edx&(1<<25) != 0, AMXINT8)
- // eax1 = CPUID.(EAX=7, ECX=1).EAX
- fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
- }
- }
- }
- // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
- // EAX
- // Bit 00: XSAVEOPT is available.
- // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
- // Bit 02: Supports XGETBV with ECX = 1 if set.
- // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
- // Bits 31 - 04: Reserved.
- // EBX
- // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
- // ECX
- // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
- // EDX?
- // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
- if mfi >= 0xd {
- if fs.inSet(XSAVE) {
- eax, _, _, _ := cpuidex(0xd, 1)
- fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
- fs.setIf(eax&(1<<1) != 0, XSAVEC)
- fs.setIf(eax&(1<<2) != 0, XGETBV1)
- fs.setIf(eax&(1<<3) != 0, XSAVES)
- }
- }
- if maxExtendedFunction() >= 0x80000001 {
- _, _, c, d := cpuid(0x80000001)
- if (c & (1 << 5)) != 0 {
- fs.set(LZCNT)
- fs.set(POPCNT)
- }
- // ECX
- fs.setIf((c&(1<<0)) != 0, LAHF)
- fs.setIf((c&(1<<2)) != 0, SVM)
- fs.setIf((c&(1<<6)) != 0, SSE4A)
- fs.setIf((c&(1<<10)) != 0, IBS)
- fs.setIf((c&(1<<22)) != 0, TOPEXT)
-
- // EDX
- fs.setIf(d&(1<<11) != 0, SYSCALL)
- fs.setIf(d&(1<<20) != 0, NX)
- fs.setIf(d&(1<<22) != 0, MMXEXT)
- fs.setIf(d&(1<<23) != 0, MMX)
- fs.setIf(d&(1<<24) != 0, FXSR)
- fs.setIf(d&(1<<25) != 0, FXSROPT)
- fs.setIf(d&(1<<27) != 0, RDTSCP)
- fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
- fs.setIf(d&(1<<31) != 0, AMD3DNOW)
-
- /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
- * used unless the OS has AVX support. */
- if fs.inSet(AVX) {
- fs.setIf((c&(1<<11)) != 0, XOP)
- fs.setIf((c&(1<<16)) != 0, FMA4)
- }
-
- }
- if maxExtendedFunction() >= 0x80000007 {
- _, b, _, d := cpuid(0x80000007)
- fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
- fs.setIf((b&(1<<1)) != 0, SUCCOR)
- fs.setIf((b&(1<<2)) != 0, HWA)
- fs.setIf((d&(1<<9)) != 0, CPBOOST)
- }
-
- if maxExtendedFunction() >= 0x80000008 {
- _, b, _, _ := cpuid(0x80000008)
- fs.setIf((b&(1<<9)) != 0, WBNOINVD)
- fs.setIf((b&(1<<8)) != 0, MCOMMIT)
- fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
- fs.setIf((b&(1<<4)) != 0, RDPRU)
- fs.setIf((b&(1<<3)) != 0, INVLPGB)
- fs.setIf((b&(1<<1)) != 0, MSRIRC)
- fs.setIf((b&(1<<0)) != 0, CLZERO)
- }
-
- if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
- _, _, _, edx := cpuid(0x8000000A)
- fs.setIf((edx>>0)&1 == 1, SVMNP)
- fs.setIf((edx>>1)&1 == 1, LBRVIRT)
- fs.setIf((edx>>2)&1 == 1, SVML)
- fs.setIf((edx>>3)&1 == 1, NRIPS)
- fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
- fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
- fs.setIf((edx>>6)&1 == 1, SVMFBASID)
- fs.setIf((edx>>7)&1 == 1, SVMDA)
- fs.setIf((edx>>10)&1 == 1, SVMPF)
- fs.setIf((edx>>12)&1 == 1, SVMPFT)
- }
-
- if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
- eax, _, _, _ := cpuid(0x8000001b)
- fs.setIf((eax>>0)&1 == 1, IBSFFV)
- fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
- fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
- fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
- fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
- fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
- fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
- fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
- }
-
- if maxExtendedFunction() >= 0x8000001f && vend == AMD {
- a, _, _, _ := cpuid(0x8000001f)
- fs.setIf((a>>0)&1 == 1, SME)
- fs.setIf((a>>1)&1 == 1, SEV)
- fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
- fs.setIf((a>>3)&1 == 1, SEV_ES)
- fs.setIf((a>>4)&1 == 1, SEV_SNP)
- fs.setIf((a>>5)&1 == 1, VMPL)
- fs.setIf((a>>10)&1 == 1, SME_COHERENT)
- fs.setIf((a>>11)&1 == 1, SEV_64BIT)
- fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
- fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
- fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
- fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
- fs.setIf((a>>16)&1 == 1, VTE)
- fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
- }
-
- return fs
-}
-
-func valAsString(values ...uint32) []byte {
- r := make([]byte, 4*len(values))
- for i, v := range values {
- dst := r[i*4:]
- dst[0] = byte(v & 0xff)
- dst[1] = byte((v >> 8) & 0xff)
- dst[2] = byte((v >> 16) & 0xff)
- dst[3] = byte((v >> 24) & 0xff)
- switch {
- case dst[0] == 0:
- return r[:i*4]
- case dst[1] == 0:
- return r[:i*4+1]
- case dst[2] == 0:
- return r[:i*4+2]
- case dst[3] == 0:
- return r[:i*4+3]
- }
- }
- return r
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
deleted file mode 100644
index 8587c3a..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build 386,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORL CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+4(FP)
- MOVL BX, ebx+8(FP)
- MOVL CX, ecx+12(FP)
- MOVL DX, edx+16(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func xgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+4(FP)
- MOVL DX, edx+8(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
-
-// func asmDarwinHasAVX512() bool
-TEXT ·asmDarwinHasAVX512(SB), 7, $0
- MOVL $0, eax+0(FP)
- RET
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
deleted file mode 100644
index bc11f89..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build amd64,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORQ CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmXgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+8(FP)
- MOVL DX, edx+12(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
-
-// From https://go-review.googlesource.com/c/sys/+/285572/
-// func asmDarwinHasAVX512() bool
-TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
- MOVB $0, ret+0(FP) // default to false
-
-#ifdef GOOS_darwin // return if not darwin
-#ifdef GOARCH_amd64 // return if not amd64
-// These values from:
-// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
-#define commpage64_base_address 0x00007fffffe00000
-#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
-#define commpage64_version (commpage64_base_address+0x01E)
-#define hasAVX512F 0x0000004000000000
- MOVQ $commpage64_version, BX
- MOVW (BX), AX
- CMPW AX, $13 // versions < 13 do not support AVX512
- JL no_avx512
- MOVQ $commpage64_cpu_capabilities64, BX
- MOVQ (BX), AX
- MOVQ $hasAVX512F, CX
- ANDQ CX, AX
- JZ no_avx512
- MOVB $1, ret+0(FP)
-
-no_avx512:
-#endif
-#endif
- RET
-
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
deleted file mode 100644
index b31d6ae..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build arm64,!gccgo,!noasm,!appengine
-
-// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
-
-// func getMidr
-TEXT ·getMidr(SB), 7, $0
- WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
- MOVD R0, midr+0(FP)
- RET
-
-// func getProcFeatures
-TEXT ·getProcFeatures(SB), 7, $0
- WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
- MOVD R0, procFeatures+0(FP)
- RET
-
-// func getInstAttributes
-TEXT ·getInstAttributes(SB), 7, $0
- WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
- WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
- MOVD R0, instAttrReg0+0(FP)
- MOVD R1, instAttrReg1+8(FP)
- RET
-
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
deleted file mode 100644
index 9a53504..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build arm64 && !gccgo && !noasm && !appengine
-// +build arm64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-import "runtime"
-
-func getMidr() (midr uint64)
-func getProcFeatures() (procFeatures uint64)
-func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
-
-func initCPU() {
- cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
- cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
- xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
- rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(c *CPUInfo, safe bool) {
- // Seems to be safe to assume on ARM64
- c.CacheLine = 64
- detectOS(c)
-
- // ARM64 disabled since it may crash if interrupt is not intercepted by OS.
- if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
- return
- }
- midr := getMidr()
-
- // MIDR_EL1 - Main ID Register
- // https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
- // x--------------------------------------------------x
- // | Name | bits | visible |
- // |--------------------------------------------------|
- // | Implementer | [31-24] | y |
- // |--------------------------------------------------|
- // | Variant | [23-20] | y |
- // |--------------------------------------------------|
- // | Architecture | [19-16] | y |
- // |--------------------------------------------------|
- // | PartNum | [15-4] | y |
- // |--------------------------------------------------|
- // | Revision | [3-0] | y |
- // x--------------------------------------------------x
-
- switch (midr >> 24) & 0xff {
- case 0xC0:
- c.VendorString = "Ampere Computing"
- c.VendorID = Ampere
- case 0x41:
- c.VendorString = "Arm Limited"
- c.VendorID = ARM
- case 0x42:
- c.VendorString = "Broadcom Corporation"
- c.VendorID = Broadcom
- case 0x43:
- c.VendorString = "Cavium Inc"
- c.VendorID = Cavium
- case 0x44:
- c.VendorString = "Digital Equipment Corporation"
- c.VendorID = DEC
- case 0x46:
- c.VendorString = "Fujitsu Ltd"
- c.VendorID = Fujitsu
- case 0x49:
- c.VendorString = "Infineon Technologies AG"
- c.VendorID = Infineon
- case 0x4D:
- c.VendorString = "Motorola or Freescale Semiconductor Inc"
- c.VendorID = Motorola
- case 0x4E:
- c.VendorString = "NVIDIA Corporation"
- c.VendorID = NVIDIA
- case 0x50:
- c.VendorString = "Applied Micro Circuits Corporation"
- c.VendorID = AMCC
- case 0x51:
- c.VendorString = "Qualcomm Inc"
- c.VendorID = Qualcomm
- case 0x56:
- c.VendorString = "Marvell International Ltd"
- c.VendorID = Marvell
- case 0x69:
- c.VendorString = "Intel Corporation"
- c.VendorID = Intel
- }
-
- // Lower 4 bits: Architecture
- // Architecture Meaning
- // 0b0001 Armv4.
- // 0b0010 Armv4T.
- // 0b0011 Armv5 (obsolete).
- // 0b0100 Armv5T.
- // 0b0101 Armv5TE.
- // 0b0110 Armv5TEJ.
- // 0b0111 Armv6.
- // 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'.
- // Upper 4 bit: Variant
- // An IMPLEMENTATION DEFINED variant number.
- // Typically, this field is used to distinguish between different product variants, or major revisions of a product.
- c.Family = int(midr>>16) & 0xff
-
- // PartNum, bits [15:4]
- // An IMPLEMENTATION DEFINED primary part number for the device.
- // On processors implemented by Arm, if the top four bits of the primary
- // part number are 0x0 or 0x7, the variant and architecture are encoded differently.
- // Revision, bits [3:0]
- // An IMPLEMENTATION DEFINED revision number for the device.
- c.Model = int(midr) & 0xffff
-
- procFeatures := getProcFeatures()
-
- // ID_AA64PFR0_EL1 - Processor Feature Register 0
- // x--------------------------------------------------x
- // | Name | bits | visible |
- // |--------------------------------------------------|
- // | DIT | [51-48] | y |
- // |--------------------------------------------------|
- // | SVE | [35-32] | y |
- // |--------------------------------------------------|
- // | GIC | [27-24] | n |
- // |--------------------------------------------------|
- // | AdvSIMD | [23-20] | y |
- // |--------------------------------------------------|
- // | FP | [19-16] | y |
- // |--------------------------------------------------|
- // | EL3 | [15-12] | n |
- // |--------------------------------------------------|
- // | EL2 | [11-8] | n |
- // |--------------------------------------------------|
- // | EL1 | [7-4] | n |
- // |--------------------------------------------------|
- // | EL0 | [3-0] | n |
- // x--------------------------------------------------x
-
- var f flagSet
- // if procFeatures&(0xf<<48) != 0 {
- // fmt.Println("DIT")
- // }
- f.setIf(procFeatures&(0xf<<32) != 0, SVE)
- if procFeatures&(0xf<<20) != 15<<20 {
- f.set(ASIMD)
- // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
- // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
- f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
- }
- f.setIf(procFeatures&(0xf<<16) != 0, FP)
-
- instAttrReg0, instAttrReg1 := getInstAttributes()
-
- // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
- //
- // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
- // x--------------------------------------------------x
- // | Name | bits | visible |
- // |--------------------------------------------------|
- // | TS | [55-52] | y |
- // |--------------------------------------------------|
- // | FHM | [51-48] | y |
- // |--------------------------------------------------|
- // | DP | [47-44] | y |
- // |--------------------------------------------------|
- // | SM4 | [43-40] | y |
- // |--------------------------------------------------|
- // | SM3 | [39-36] | y |
- // |--------------------------------------------------|
- // | SHA3 | [35-32] | y |
- // |--------------------------------------------------|
- // | RDM | [31-28] | y |
- // |--------------------------------------------------|
- // | ATOMICS | [23-20] | y |
- // |--------------------------------------------------|
- // | CRC32 | [19-16] | y |
- // |--------------------------------------------------|
- // | SHA2 | [15-12] | y |
- // |--------------------------------------------------|
- // | SHA1 | [11-8] | y |
- // |--------------------------------------------------|
- // | AES | [7-4] | y |
- // x--------------------------------------------------x
-
- // if instAttrReg0&(0xf<<52) != 0 {
- // fmt.Println("TS")
- // }
- // if instAttrReg0&(0xf<<48) != 0 {
- // fmt.Println("FHM")
- // }
- f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
- f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
- f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
- f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
- f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
- f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
- f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
- f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
- // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
- // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
- f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
- f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
- f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
- // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
- // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
- f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
-
- // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
- //
- // ID_AA64ISAR1_EL1 - Instruction set attribute register 1
- // x--------------------------------------------------x
- // | Name | bits | visible |
- // |--------------------------------------------------|
- // | GPI | [31-28] | y |
- // |--------------------------------------------------|
- // | GPA | [27-24] | y |
- // |--------------------------------------------------|
- // | LRCPC | [23-20] | y |
- // |--------------------------------------------------|
- // | FCMA | [19-16] | y |
- // |--------------------------------------------------|
- // | JSCVT | [15-12] | y |
- // |--------------------------------------------------|
- // | API | [11-8] | y |
- // |--------------------------------------------------|
- // | APA | [7-4] | y |
- // |--------------------------------------------------|
- // | DPB | [3-0] | y |
- // x--------------------------------------------------x
-
- // if instAttrReg1&(0xf<<28) != 0 {
- // fmt.Println("GPI")
- // }
- f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
- f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
- f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
- f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
- // if instAttrReg1&(0xf<<8) != 0 {
- // fmt.Println("API")
- // }
- // if instAttrReg1&(0xf<<4) != 0 {
- // fmt.Println("APA")
- // }
- f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
-
- // Store
- c.featureSet.or(f)
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
deleted file mode 100644
index 9636c2b..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
-// +build !amd64,!386,!arm64 gccgo noasm appengine
-
-package cpuid
-
-func initCPU() {
- cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
- cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
- xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
- rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(info *CPUInfo, safe bool) {}
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
deleted file mode 100644
index c946824..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
-// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-func asmXgetbv(index uint32) (eax, edx uint32)
-func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-func asmDarwinHasAVX512() bool
-
-func initCPU() {
- cpuid = asmCpuid
- cpuidex = asmCpuidex
- xgetbv = asmXgetbv
- rdtscpAsm = asmRdtscpAsm
- darwinHasAVX512 = asmDarwinHasAVX512
-}
-
-func addInfo(c *CPUInfo, safe bool) {
- c.maxFunc = maxFunctionID()
- c.maxExFunc = maxExtendedFunction()
- c.BrandName = brandName()
- c.CacheLine = cacheLine()
- c.Family, c.Model, c.Stepping = familyModel()
- c.featureSet = support()
- c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
- c.ThreadsPerCore = threadsPerCore()
- c.LogicalCores = logicalCores()
- c.PhysicalCores = physicalCores()
- c.VendorID, c.VendorString = vendorID()
- c.cacheSize()
- c.frequencies()
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
deleted file mode 100644
index d12e547..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
-
-package cpuid
-
-import "strconv"
-
-func _() {
- // An "invalid array index" compiler error signifies that the constant values have changed.
- // Re-run the stringer command to generate them again.
- var x [1]struct{}
- _ = x[ADX-1]
- _ = x[AESNI-2]
- _ = x[AMD3DNOW-3]
- _ = x[AMD3DNOWEXT-4]
- _ = x[AMXBF16-5]
- _ = x[AMXINT8-6]
- _ = x[AMXTILE-7]
- _ = x[AVX-8]
- _ = x[AVX2-9]
- _ = x[AVX512BF16-10]
- _ = x[AVX512BITALG-11]
- _ = x[AVX512BW-12]
- _ = x[AVX512CD-13]
- _ = x[AVX512DQ-14]
- _ = x[AVX512ER-15]
- _ = x[AVX512F-16]
- _ = x[AVX512FP16-17]
- _ = x[AVX512IFMA-18]
- _ = x[AVX512PF-19]
- _ = x[AVX512VBMI-20]
- _ = x[AVX512VBMI2-21]
- _ = x[AVX512VL-22]
- _ = x[AVX512VNNI-23]
- _ = x[AVX512VP2INTERSECT-24]
- _ = x[AVX512VPOPCNTDQ-25]
- _ = x[AVXSLOW-26]
- _ = x[AVXVNNI-27]
- _ = x[BMI1-28]
- _ = x[BMI2-29]
- _ = x[CETIBT-30]
- _ = x[CETSS-31]
- _ = x[CLDEMOTE-32]
- _ = x[CLMUL-33]
- _ = x[CLZERO-34]
- _ = x[CMOV-35]
- _ = x[CMPSB_SCADBS_SHORT-36]
- _ = x[CMPXCHG8-37]
- _ = x[CPBOOST-38]
- _ = x[CX16-39]
- _ = x[ENQCMD-40]
- _ = x[ERMS-41]
- _ = x[F16C-42]
- _ = x[FMA3-43]
- _ = x[FMA4-44]
- _ = x[FXSR-45]
- _ = x[FXSROPT-46]
- _ = x[GFNI-47]
- _ = x[HLE-48]
- _ = x[HRESET-49]
- _ = x[HTT-50]
- _ = x[HWA-51]
- _ = x[HYPERVISOR-52]
- _ = x[IBPB-53]
- _ = x[IBS-54]
- _ = x[IBSBRNTRGT-55]
- _ = x[IBSFETCHSAM-56]
- _ = x[IBSFFV-57]
- _ = x[IBSOPCNT-58]
- _ = x[IBSOPCNTEXT-59]
- _ = x[IBSOPSAM-60]
- _ = x[IBSRDWROPCNT-61]
- _ = x[IBSRIPINVALIDCHK-62]
- _ = x[IBS_PREVENTHOST-63]
- _ = x[INT_WBINVD-64]
- _ = x[INVLPGB-65]
- _ = x[LAHF-66]
- _ = x[LAM-67]
- _ = x[LBRVIRT-68]
- _ = x[LZCNT-69]
- _ = x[MCAOVERFLOW-70]
- _ = x[MCOMMIT-71]
- _ = x[MMX-72]
- _ = x[MMXEXT-73]
- _ = x[MOVBE-74]
- _ = x[MOVDIR64B-75]
- _ = x[MOVDIRI-76]
- _ = x[MOVSB_ZL-77]
- _ = x[MPX-78]
- _ = x[MSRIRC-79]
- _ = x[MSR_PAGEFLUSH-80]
- _ = x[NRIPS-81]
- _ = x[NX-82]
- _ = x[OSXSAVE-83]
- _ = x[PCONFIG-84]
- _ = x[POPCNT-85]
- _ = x[RDPRU-86]
- _ = x[RDRAND-87]
- _ = x[RDSEED-88]
- _ = x[RDTSCP-89]
- _ = x[RTM-90]
- _ = x[RTM_ALWAYS_ABORT-91]
- _ = x[SERIALIZE-92]
- _ = x[SEV-93]
- _ = x[SEV_64BIT-94]
- _ = x[SEV_ALTERNATIVE-95]
- _ = x[SEV_DEBUGSWAP-96]
- _ = x[SEV_ES-97]
- _ = x[SEV_RESTRICTED-98]
- _ = x[SEV_SNP-99]
- _ = x[SGX-100]
- _ = x[SGXLC-101]
- _ = x[SHA-102]
- _ = x[SME-103]
- _ = x[SME_COHERENT-104]
- _ = x[SSE-105]
- _ = x[SSE2-106]
- _ = x[SSE3-107]
- _ = x[SSE4-108]
- _ = x[SSE42-109]
- _ = x[SSE4A-110]
- _ = x[SSSE3-111]
- _ = x[STIBP-112]
- _ = x[STOSB_SHORT-113]
- _ = x[SUCCOR-114]
- _ = x[SVM-115]
- _ = x[SVMDA-116]
- _ = x[SVMFBASID-117]
- _ = x[SVML-118]
- _ = x[SVMNP-119]
- _ = x[SVMPF-120]
- _ = x[SVMPFT-121]
- _ = x[SYSCALL-122]
- _ = x[SYSEE-123]
- _ = x[TBM-124]
- _ = x[TOPEXT-125]
- _ = x[TME-126]
- _ = x[TSCRATEMSR-127]
- _ = x[TSXLDTRK-128]
- _ = x[VAES-129]
- _ = x[VMCBCLEAN-130]
- _ = x[VMPL-131]
- _ = x[VMSA_REGPROT-132]
- _ = x[VMX-133]
- _ = x[VPCLMULQDQ-134]
- _ = x[VTE-135]
- _ = x[WAITPKG-136]
- _ = x[WBNOINVD-137]
- _ = x[X87-138]
- _ = x[XGETBV1-139]
- _ = x[XOP-140]
- _ = x[XSAVE-141]
- _ = x[XSAVEC-142]
- _ = x[XSAVEOPT-143]
- _ = x[XSAVES-144]
- _ = x[AESARM-145]
- _ = x[ARMCPUID-146]
- _ = x[ASIMD-147]
- _ = x[ASIMDDP-148]
- _ = x[ASIMDHP-149]
- _ = x[ASIMDRDM-150]
- _ = x[ATOMICS-151]
- _ = x[CRC32-152]
- _ = x[DCPOP-153]
- _ = x[EVTSTRM-154]
- _ = x[FCMA-155]
- _ = x[FP-156]
- _ = x[FPHP-157]
- _ = x[GPA-158]
- _ = x[JSCVT-159]
- _ = x[LRCPC-160]
- _ = x[PMULL-161]
- _ = x[SHA1-162]
- _ = x[SHA2-163]
- _ = x[SHA3-164]
- _ = x[SHA512-165]
- _ = x[SM3-166]
- _ = x[SM4-167]
- _ = x[SVE-168]
- _ = x[lastID-169]
- _ = x[firstID-0]
-}
-
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
-
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122}
-
-func (i FeatureID) String() string {
- if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
- return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
- }
- return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
-}
-func _() {
- // An "invalid array index" compiler error signifies that the constant values have changed.
- // Re-run the stringer command to generate them again.
- var x [1]struct{}
- _ = x[VendorUnknown-0]
- _ = x[Intel-1]
- _ = x[AMD-2]
- _ = x[VIA-3]
- _ = x[Transmeta-4]
- _ = x[NSC-5]
- _ = x[KVM-6]
- _ = x[MSVM-7]
- _ = x[VMware-8]
- _ = x[XenHVM-9]
- _ = x[Bhyve-10]
- _ = x[Hygon-11]
- _ = x[SiS-12]
- _ = x[RDC-13]
- _ = x[Ampere-14]
- _ = x[ARM-15]
- _ = x[Broadcom-16]
- _ = x[Cavium-17]
- _ = x[DEC-18]
- _ = x[Fujitsu-19]
- _ = x[Infineon-20]
- _ = x[Motorola-21]
- _ = x[NVIDIA-22]
- _ = x[AMCC-23]
- _ = x[Qualcomm-24]
- _ = x[Marvell-25]
- _ = x[lastVendor-26]
-}
-
-const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
-
-var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
-
-func (i Vendor) String() string {
- if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
- return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
- }
- return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
deleted file mode 100644
index d91d021..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-package cpuid
-
-import (
- "runtime"
- "strings"
-
- "golang.org/x/sys/unix"
-)
-
-func detectOS(c *CPUInfo) bool {
- if runtime.GOOS != "ios" {
- tryToFillCPUInfoFomSysctl(c)
- }
- // There are no hw.optional sysctl values for the below features on Mac OS 11.0
- // to detect their supported state dynamically. Assume the CPU features that
- // Apple Silicon M1 supports to be available as a minimal set of features
- // to all Go programs running on darwin/arm64.
- // TODO: Add more if we know them.
- c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
-
- return true
-}
-
-func sysctlGetBool(name string) bool {
- value, err := unix.SysctlUint32(name)
- if err != nil {
- return false
- }
- return value != 0
-}
-
-func sysctlGetString(name string) string {
- value, err := unix.Sysctl(name)
- if err != nil {
- return ""
- }
- return value
-}
-
-func sysctlGetInt(unknown int, names ...string) int {
- for _, name := range names {
- value, err := unix.SysctlUint32(name)
- if err != nil {
- continue
- }
- if value != 0 {
- return int(value)
- }
- }
- return unknown
-}
-
-func sysctlGetInt64(unknown int, names ...string) int {
- for _, name := range names {
- value64, err := unix.SysctlUint64(name)
- if err != nil {
- continue
- }
- if int(value64) != unknown {
- return int(value64)
- }
- }
- return unknown
-}
-
-func setFeature(c *CPUInfo, name string, feature FeatureID) {
- c.featureSet.setIf(sysctlGetBool(name), feature)
-}
-func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
- c.BrandName = sysctlGetString("machdep.cpu.brand_string")
-
- if len(c.BrandName) != 0 {
- c.VendorString = strings.Fields(c.BrandName)[0]
- }
-
- c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
- c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
- sysctlGetInt(1, "hw.physicalcpu")
- c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
- c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
- c.Model = sysctlGetInt(0, "machdep.cpu.model")
- c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
- c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
- c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
- c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
- c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
-
- // from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
- setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
- setFeature(c, "hw.optional.AdvSIMD", ASIMD)
- setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
- setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
- setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
- setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
- // setFeature(c, "", EVTSTRM)
- setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
- setFeature(c, "hw.optional.arm.FEAT_FP", FP)
- setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
- setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
- setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
- setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
- setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
- setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
- setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
- setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
- setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
- // setFeature(c, "", SM3)
- // setFeature(c, "", SM4)
- setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
-
- // from empirical observation
- setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
- setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
- setFeature(c, "hw.optional.floatingpoint", FP)
- setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
- setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
- setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
- setFeature(c, "hw.optional.armv8_crc32", CRC32)
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
deleted file mode 100644
index ee278b9..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file located
-// here https://github.com/golang/sys/blob/master/LICENSE
-
-package cpuid
-
-import (
- "encoding/binary"
- "io/ioutil"
- "runtime"
-)
-
-// HWCAP bits.
-const (
- hwcap_FP = 1 << 0
- hwcap_ASIMD = 1 << 1
- hwcap_EVTSTRM = 1 << 2
- hwcap_AES = 1 << 3
- hwcap_PMULL = 1 << 4
- hwcap_SHA1 = 1 << 5
- hwcap_SHA2 = 1 << 6
- hwcap_CRC32 = 1 << 7
- hwcap_ATOMICS = 1 << 8
- hwcap_FPHP = 1 << 9
- hwcap_ASIMDHP = 1 << 10
- hwcap_CPUID = 1 << 11
- hwcap_ASIMDRDM = 1 << 12
- hwcap_JSCVT = 1 << 13
- hwcap_FCMA = 1 << 14
- hwcap_LRCPC = 1 << 15
- hwcap_DCPOP = 1 << 16
- hwcap_SHA3 = 1 << 17
- hwcap_SM3 = 1 << 18
- hwcap_SM4 = 1 << 19
- hwcap_ASIMDDP = 1 << 20
- hwcap_SHA512 = 1 << 21
- hwcap_SVE = 1 << 22
- hwcap_ASIMDFHM = 1 << 23
-)
-
-func detectOS(c *CPUInfo) bool {
- // For now assuming no hyperthreading is reasonable.
- c.LogicalCores = runtime.NumCPU()
- c.PhysicalCores = c.LogicalCores
- c.ThreadsPerCore = 1
- if hwcap == 0 {
- // We did not get values from the runtime.
- // Try reading /proc/self/auxv
-
- // From https://github.com/golang/sys
- const (
- _AT_HWCAP = 16
- _AT_HWCAP2 = 26
-
- uintSize = int(32 << (^uint(0) >> 63))
- )
-
- buf, err := ioutil.ReadFile("/proc/self/auxv")
- if err != nil {
- // e.g. on android /proc/self/auxv is not accessible, so silently
- // ignore the error and leave Initialized = false. On some
- // architectures (e.g. arm64) doinit() implements a fallback
- // readout and will set Initialized = true again.
- return false
- }
- bo := binary.LittleEndian
- for len(buf) >= 2*(uintSize/8) {
- var tag, val uint
- switch uintSize {
- case 32:
- tag = uint(bo.Uint32(buf[0:]))
- val = uint(bo.Uint32(buf[4:]))
- buf = buf[8:]
- case 64:
- tag = uint(bo.Uint64(buf[0:]))
- val = uint(bo.Uint64(buf[8:]))
- buf = buf[16:]
- }
- switch tag {
- case _AT_HWCAP:
- hwcap = val
- case _AT_HWCAP2:
- // Not used
- }
- }
- if hwcap == 0 {
- return false
- }
- }
-
- // HWCap was populated by the runtime from the auxiliary vector.
- // Use HWCap information since reading aarch64 system registers
- // is not supported in user space on older linux kernels.
- c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
- c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
- c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
- c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
- c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
- c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
- c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
- c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
- c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
- c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
- c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
- c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
- c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
- c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
- c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
- c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
- c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
- c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
- c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
- c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
- c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
- c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
-
- // The Samsung S9+ kernel reports support for atomics, but not all cores
- // actually support them, resulting in SIGILL. See issue #28431.
- // TODO(elias.naur): Only disable the optimization on bad chipsets on android.
- c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
-
- return true
-}
-
-func isSet(hwc uint, value uint) bool {
- return hwc&value != 0
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
deleted file mode 100644
index 8733ba3..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build arm64 && !linux && !darwin
-// +build arm64,!linux,!darwin
-
-package cpuid
-
-import "runtime"
-
-func detectOS(c *CPUInfo) bool {
- c.PhysicalCores = runtime.NumCPU()
- // For now assuming 1 thread per core...
- c.ThreadsPerCore = 1
- c.LogicalCores = c.PhysicalCores
- return false
-}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
deleted file mode 100644
index f8f201b..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build nounsafe
-// +build nounsafe
-
-package cpuid
-
-var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
deleted file mode 100644
index 92af622..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build !nounsafe
-// +build !nounsafe
-
-package cpuid
-
-import _ "unsafe" // needed for go:linkname
-
-//go:linkname hwcap internal/cpu.HWCap
-var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
deleted file mode 100644
index 471d986..0000000
--- a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-set -e
-
-go tool dist list | while IFS=/ read os arch; do
- echo "Checking $os/$arch..."
- echo " normal"
- GOARCH=$arch GOOS=$os go build -o /dev/null .
- echo " noasm"
- GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
- echo " appengine"
- GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
- echo " noasm,appengine"
- GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
-done
diff --git a/vendor/github.com/klauspost/reedsolomon/.gitignore b/vendor/github.com/klauspost/reedsolomon/.gitignore
deleted file mode 100644
index 59610b5..0000000
--- a/vendor/github.com/klauspost/reedsolomon/.gitignore
+++ /dev/null
@@ -1,26 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
-
-.idea
\ No newline at end of file
diff --git a/vendor/github.com/klauspost/reedsolomon/LICENSE b/vendor/github.com/klauspost/reedsolomon/LICENSE
deleted file mode 100644
index a947e16..0000000
--- a/vendor/github.com/klauspost/reedsolomon/LICENSE
+++ /dev/null
@@ -1,23 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-Copyright (c) 2015 Backblaze
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md
deleted file mode 100644
index c8f1886..0000000
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ /dev/null
@@ -1,518 +0,0 @@
-# Reed-Solomon
-[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
-
-Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
-
-This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by
-[Backblaze](http://backblaze.com), with some additional optimizations.
-
-For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
-
-For encoding high shard counts (>256) a Leopard implementation is used.
-For most platforms this performs close to the original Leopard implementation in terms of speed.
-
-Package home: https://github.com/klauspost/reedsolomon
-
-Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
-
-# Installation
-To get the package use the standard:
-```bash
-go get -u github.com/klauspost/reedsolomon
-```
-
-Using Go modules is recommended.
-
-# Changes
-
-## 2022
-
-* Leopard GF16 mode added, for up to 63336 shards.
-* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
-
-## 2021
-
-* Use `GOAMD64=v4` to enable faster AVX2.
-* Add progressive shard encoding.
-* Wider AVX2 loops
-* Limit concurrency on AVX2, since we are likely memory bound.
-* Allow 0 parity shards.
-* Allow disabling inversion cache.
-* Faster AVX2 encoding.
-
-
- See older changes
-
-## May 2020
-
-* ARM64 optimizations, up to 2.5x faster.
-* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
-* Much better performance when using a limited number of goroutines.
-* AVX512 is now using multiple cores.
-* Stream processing overhaul, big speedups in most cases.
-* AVX512 optimizations
-
-## March 6, 2019
-
-The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
-
-## February 8, 2019
-
-AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
-See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
-
-## December 18, 2018
-
-Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
-
-## November 18, 2017
-
-Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt
-to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
-
-## October 1, 2017
-
-* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option.
-Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
-
-* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines)
-has been increased for better multi-core scaling.
-
-* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to
-be used instead of allocating new memory.
-
-## August 26, 2017
-
-* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update`
-function contributed by [chenzhongtao](https://github.com/chenzhongtao).
-
-* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly,
-which gives a huge performance boost on this platform.
-
-## July 20, 2017
-
-`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface.
-This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
-
-```Go
-func (e *YourEnc) ReconstructData(shards [][]byte) error {
- return ReconstructData(shards)
-}
-```
-
-You can of course also do your own implementation.
-The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder)
-handles this without modifying the interface.
-This is a good lesson on why returning interfaces is not a good design.
-
-
-
-# Usage
-
-This section assumes you know the basics of Reed-Solomon encoding.
-A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
-
-This package performs the calculation of the parity sets. The usage is therefore relatively simple.
-
-First of all, you need to choose your distribution of data and parity shards.
-A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
-
-To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
-```Go
- enc, err := reedsolomon.New(10, 3)
-```
-This encoder will work for all parity sets with this distribution of data and parity shards.
-
-If you will primarily be using it with one shard size it is recommended to use
-[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
-as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
-It is not required that all shards are this size.
-
-Then you send and receive data that is a simple slice of byte slices; `[][]byte`.
-In the example above, the top slice must have a length of 13.
-
-```Go
- data := make([][]byte, 13)
-```
-You should then fill the 10 first slices with *equally sized* data,
-and create parity shards that will be populated with parity data. In this case we create the data in memory,
-but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
-
-```Go
- // Create all shards, size them at 50000 each
- for i := range input {
- data[i] := make([]byte, 50000)
- }
-
-
- // Fill some data into the data shards
- for i, in := range data[:10] {
- for j:= range in {
- in[j] = byte((i+j)&0xff)
- }
- }
-```
-
-To populate the parity shards, you simply call `Encode()` with your data.
-```Go
- err = enc.Encode(data)
-```
-The only cases where you should get an error is, if the data shards aren't of equal size.
-The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
-
-```Go
- ok, err = enc.Verify(data)
-```
-
-The final (and important) part is to be able to reconstruct missing shards.
-For this to work, you need to know which parts of your data is missing.
-The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario,
-you need to implement a hash check for each shard.
-
-If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
-
-To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
-
-```Go
- // Delete two data shards
- data[3] = nil
- data[7] = nil
-
- // Reconstruct the missing shards
- err := enc.Reconstruct(data)
-```
-The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
-
-If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
-
-```Go
- // Delete two data shards
- data[3] = nil
- data[7] = nil
-
- // Reconstruct just the missing data shards
- err := enc.ReconstructData(data)
-```
-
-If you don't need all data shards you can use `ReconstructSome()`:
-
-```Go
- // Delete two data shards
- data[3] = nil
- data[7] = nil
-
- // Reconstruct just the shard 3
- err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
-```
-
-So to sum up reconstruction:
-* The number of data/parity shards must match the numbers used for encoding.
-* The order of shards must be the same as used when encoding.
-* You may only supply data you know is valid.
-* Invalid shards should be set to nil.
-
-For complete examples of an encoder and decoder see the
-[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
-
-# Splitting/Joining Data
-
-You might have a large slice of data.
-To help you split this, there are some helper functions that can split and join a single byte slice.
-
-```Go
- bigfile, _ := ioutil.Readfile("myfile.data")
-
- // Split the file
- split, err := enc.Split(bigfile)
-```
-This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
-
-An important thing to note is that you have to *keep track of the exact input size*.
-If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
-
-To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
-```Go
- // Join a data set and write it to io.Discard.
- err = enc.Join(io.Discard, data, len(bigfile))
-```
-
-# Progressive encoding
-
-It is possible to encode individual shards using EncodeIdx:
-
-```Go
- // EncodeIdx will add parity for a single data shard.
- // Parity shards should start out as 0. The caller must zero them.
- // Data shards must be delivered exactly once. There is no check for this.
- // The parity shards will always be updated and the data shards will remain the same.
- EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
-```
-
-This allows progressively encoding the parity by sending individual data shards.
-There is no requirement on shards being delivered in order,
-but when sent in order it allows encoding shards one at the time,
-effectively allowing the operation to be streaming.
-
-The result will be the same as encoding all shards at once.
-There is a minor speed penalty using this method, so send
-shards at once if they are available.
-
-## Example
-
-```Go
-func test() {
- // Create an encoder with 7 data and 3 parity slices.
- enc, _ := reedsolomon.New(7, 3)
-
- // This will be our output parity.
- parity := make([][]byte, 3)
- for i := range parity {
- parity[i] = make([]byte, 10000)
- }
-
- for i := 0; i < 7; i++ {
- // Send data shards one at the time.
- _ = enc.EncodeIdx(make([]byte, 10000), i, parity)
- }
-
- // parity now contains parity, as if all data was sent in one call.
-}
-```
-
-# Streaming/Merging
-
-It might seem like a limitation that all data should be in memory,
-but an important property is that *as long as the number of data/parity shards are the same,
-you can merge/split data sets*, and they will remain valid as a separate set.
-
-```Go
- // Split the data set of 50000 elements into two of 25000
- splitA := make([][]byte, 13)
- splitB := make([][]byte, 13)
-
- // Merge into a 100000 element set
- merged := make([][]byte, 13)
-
- for i := range data {
- splitA[i] = data[i][:25000]
- splitB[i] = data[i][25000:]
-
- // Concatenate it to itself
- merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
- merged[i] = append(merged[i], data[i]...)
- }
-
- // Each part should still verify as ok.
- ok, err := enc.Verify(splitA)
- if ok && err == nil {
- log.Println("splitA ok")
- }
-
- ok, err = enc.Verify(splitB)
- if ok && err == nil {
- log.Println("splitB ok")
- }
-
- ok, err = enc.Verify(merge)
- if ok && err == nil {
- log.Println("merge ok")
- }
-```
-
-This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks.
-For the best throughput, don't use too small blocks.
-
-This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data.
-This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
-
-# Streaming API
-
-There has been added support for a streaming API, to help perform fully streaming operations,
-which enables you to do the same operations, but on streams.
-To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function
-to create the encoding/decoding interfaces.
-
-You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams)
-to ready an interface that reads/writes concurrently from the streams.
-
-You can specify the size of each operation using
-[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
-This will set the size of each read/write operation.
-
-Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API.
-Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
-If an error occurs in relation to a stream,
-a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError)
-or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError)
-will help you determine which stream was the offender.
-
-There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
-
-For complete examples of a streaming encoder and decoder see the
-[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
-
-GF16 (more than 256 shards) is not supported by the streaming interface.
-
-# Advanced Options
-
-You can modify internal options which affects how jobs are split between and processed by goroutines.
-
-To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`.
-If no Options are supplied, default options are used.
-
-Example of how to supply options:
-
- ```Go
- enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
- ```
-
-# Leopard Compatible GF16
-
-When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
-
-This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
-
-* The original and recovery data must not exceed 65536 pieces.
-* The shard size *must* each be a multiple of 64 bytes.
-* Each buffer should have the same number of bytes.
-* Even the last shard must be rounded up to the block size.
-
-| | Regular | Leopard |
-|-----------------|---------|---------|
-| Encode | ✓ | ✓ |
-| EncodeIdx | ✓ | - |
-| Verify | ✓ | ✓ |
-| Reconstruct | ✓ | ✓ |
-| ReconstructData | ✓ | ✓ |
-| ReconstructSome | ✓ | ✓ (+) |
-| Update | ✓ | - |
-| Split | ✓ | ✓ |
-| Join | ✓ | ✓ |
-
-* (+) Same as calling `ReconstructData`.
-
-The Split/Join functions will help to split an input to the proper sizes.
-
-Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`.
-Reconstruction matrix calculation is more time-consuming,
-so be sure to include that as part of any benchmark you run.
-
-For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
-
-Leopard mode currently always runs as a single goroutine, since multiple gorouties doesn't provide any worthwhile speedup.
-
-## Forcing Leopard GF16
-
-The `WithLeopardGF16(true)` can be used to use Leopard GF16 for all operations.
-This is *not* compatible with the Leopard library that has a separate GF8 implementation.
-
-Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
-For Cauchy matrix the inversion cache is disabled for a more "fair" test.
-Speed is total shard size for each operation. Data shard throughput is speed/2.
-AVX2 is used.
-
-| Encoder | Shards | Encode | Recover All | Recover One |
-|--------------|-------------|---------------|--------------|--------------|
-| Cauchy | 4+4 | 23076.83 MB/s | 3048.86 MB/s | 5620.84 MB/s |
-| Cauchy | 8+8 | 15206.87 MB/s | 3041.99 MB/s | 7173.71 MB/s |
-| Cauchy | 16+16 | 7427.47 MB/s | 1384.58 MB/s | 6343.85 MB/s |
-| Cauchy | 32+32 | 3785.64 MB/s | 557.60 MB/s | 4660.27 MB/s |
-| Cauchy | 64+64 | 1911.93 MB/s | 160.54 MB/s | 2864.63 MB/s |
-| Cauchy | 128+128 | 963.83 MB/s | 42.81 MB/s | 1597.93 MB/s |
-| Leopard GF16 | 4+4 | 18468.32 MB/s | 10.45 MB/s | 10.30 MB/s |
-| Leopard GF16 | 8+8 | 10293.79 MB/s | 20.83 MB/s | 20.51 MB/s |
-| Leopard GF16 | 16+16 | 12386.04 MB/s | 40.80 MB/s | 40.47 MB/s |
-| Leopard GF16 | 32+32 | 7347.35 MB/s | 81.15 MB/s | 79.80 MB/s |
-| Leopard GF16 | 64+64 | 8299.63 MB/s | 150.47 MB/s | 154.15 MB/s |
-| Leopard GF16 | 128+128 | 5629.04 MB/s | 278.84 MB/s | 289.15 MB/s |
-| Leopard GF16 | 256+256 | 6158.66 MB/s | 454.14 MB/s | 506.70 MB/s |
-| Leopard GF16 | 512+512 | 4418.58 MB/s | 685.75 MB/s | 801.63 MB/s |
-| Leopard GF16 | 1024+1024 | 4778.05 MB/s | 814.51 MB/s | 1080.19 MB/s |
-| Leopard GF16 | 2048+2048 | 3417.05 MB/s | 911.64 MB/s | 1179.48 MB/s |
-| Leopard GF16 | 4096+4096 | 3209.41 MB/s | 729.13 MB/s | 1135.06 MB/s |
-| Leopard GF16 | 8192+8192 | 2034.11 MB/s | 604.52 MB/s | 842.13 MB/s |
-| Leopard GF16 | 16384+16384 | 1525.88 MB/s | 486.74 MB/s | 750.01 MB/s |
-| Leopard GF16 | 32768+32768 | 1138.67 MB/s | 482.81 MB/s | 712.73 MB/s |
-
-"Traditional" encoding is faster until somewhere between 16 and 32 shards.
-Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
-
-Calculating the reconstruction matrix takes a significant amount of computation.
-With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
-Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard.
-
-# Performance
-
-Performance depends mainly on the number of parity shards.
-In rough terms, doubling the number of parity shards will double the encoding time.
-
-Here are the throughput numbers with some different selections of data and parity shards.
-For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
-
-| Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
-|------|--------|---------|------------|-----------|
-| 5 | 2 | 20,772 | 66,355 | 108,755 |
-| 8 | 8 | 6,815 | 38,338 | 70,516 |
-| 10 | 4 | 9,245 | 48,237 | 93,875 |
-| 50 | 20 | 2,063 | 12,130 | 22,828 |
-
-The throughput numbers here is the size of the encoded data and parity shards.
-
-If `runtime.GOMAXPROCS()` is set to a value higher than 1,
-the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
-
-
-Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
-```
-benchmark all MB/s data MB/s speedup
-BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
-BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
-BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
-BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
-BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
-BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
-BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
-```
-
-The performance on AVX512 has been accelerated for CPUs when available.
-
-## ARM64 NEON
-
-By exploiting NEON instructions the performance for ARM has been accelerated.
-Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
-
-```
-BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s
-BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s
-BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s
-BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
-```
-
-# Performance on ppc64le
-
-The performance for ppc64le has been accelerated.
-This gives roughly a 10x performance improvement on this architecture as can be seen below:
-
-```
-benchmark old MB/s new MB/s speedup
-BenchmarkGalois128K-160 948.87 8878.85 9.36x
-BenchmarkGalois1M-160 968.85 9041.92 9.33x
-BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
-BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
-```
-
-
-# Links
-* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
-* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
-* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
-* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
-* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
-* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
-* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
-* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
-* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
-
-# License
-
-This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go
deleted file mode 100644
index 703f209..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois.go
+++ /dev/null
@@ -1,954 +0,0 @@
-/**
- * 8-bit Galois Field
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc. All rights reserved.
- */
-
-package reedsolomon
-
-import "encoding/binary"
-
-const (
- // The number of elements in the field.
- fieldSize = 256
-
- // The polynomial used to generate the logarithm table.
- //
- // There are a number of polynomials that work to generate
- // a Galois field of 256 elements. The choice is arbitrary,
- // and we just use the first one.
- //
- // The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105,
- //* 113, 135, 141, 169, 195, 207, 231, and 245.
- generatingPolynomial = 29
-)
-
-var logTable = [fieldSize]byte{
- 0, 0, 1, 25, 2, 50, 26, 198,
- 3, 223, 51, 238, 27, 104, 199, 75,
- 4, 100, 224, 14, 52, 141, 239, 129,
- 28, 193, 105, 248, 200, 8, 76, 113,
- 5, 138, 101, 47, 225, 36, 15, 33,
- 53, 147, 142, 218, 240, 18, 130, 69,
- 29, 181, 194, 125, 106, 39, 249, 185,
- 201, 154, 9, 120, 77, 228, 114, 166,
- 6, 191, 139, 98, 102, 221, 48, 253,
- 226, 152, 37, 179, 16, 145, 34, 136,
- 54, 208, 148, 206, 143, 150, 219, 189,
- 241, 210, 19, 92, 131, 56, 70, 64,
- 30, 66, 182, 163, 195, 72, 126, 110,
- 107, 58, 40, 84, 250, 133, 186, 61,
- 202, 94, 155, 159, 10, 21, 121, 43,
- 78, 212, 229, 172, 115, 243, 167, 87,
- 7, 112, 192, 247, 140, 128, 99, 13,
- 103, 74, 222, 237, 49, 197, 254, 24,
- 227, 165, 153, 119, 38, 184, 180, 124,
- 17, 68, 146, 217, 35, 32, 137, 46,
- 55, 63, 209, 91, 149, 188, 207, 205,
- 144, 135, 151, 178, 220, 252, 190, 97,
- 242, 86, 211, 171, 20, 42, 93, 158,
- 132, 60, 57, 83, 71, 109, 65, 162,
- 31, 45, 67, 216, 183, 123, 164, 118,
- 196, 23, 73, 236, 127, 12, 111, 246,
- 108, 161, 59, 82, 41, 157, 85, 170,
- 251, 96, 134, 177, 187, 204, 62, 90,
- 203, 89, 95, 176, 156, 169, 160, 81,
- 11, 245, 22, 235, 122, 117, 44, 215,
- 79, 174, 213, 233, 230, 231, 173, 232,
- 116, 214, 244, 234, 168, 80, 88, 175,
-}
-
-/**
- * Inverse of the logarithm table. Maps integer logarithms
- * to members of the field. There is no entry for 255
- * because the highest log is 254.
- *
- * This table was generated by `go run gentables.go`
- */
-var expTable = []byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e}
-
-func galAdd(a, b byte) byte {
- return a ^ b
-}
-
-func galSub(a, b byte) byte {
- return a ^ b
-}
-
-// Table from https://github.com/templexxx/reedsolomon
-var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd}
-
-var mulTable = [256][256]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
- {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff},
- {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0xd, 0xf, 0x9, 0xb, 0x5, 0x7, 0x1, 0x3, 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3},
- {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0xd, 0xe, 0xb, 0x8, 0x1, 0x2, 0x7, 0x4, 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c},
- {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, 0x15, 0x11, 0xd, 0x9, 0x5, 0x1, 0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, 0x1a, 0x1e, 0x12, 0x16, 0xa, 0xe, 0x2, 0x6, 0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b, 0x7, 0x3, 0xf, 0xb, 0x17, 0x13, 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb},
- {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, 0xd, 0x8, 0x7, 0x2, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, 0x1a, 0x1f, 0x10, 0x15, 0xe, 0xb, 0x4, 0x1, 0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, 0x17, 0x12, 0x1d, 0x18, 0x3, 0x6, 0x9, 0xc, 0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24},
- {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0xd, 0xb, 0x1, 0x7, 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0xf, 0x9, 0x3, 0x5, 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, 0x2, 0x4, 0xe, 0x8, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38},
- {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x5, 0x2, 0xb, 0xc, 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0xf, 0x8, 0x1, 0x6, 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, 0xa, 0xd, 0x4, 0x3, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7},
- {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0x1d, 0x15, 0xd, 0x5, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0xa, 0x2, 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x7, 0xf, 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x4, 0xc, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, 0x9, 0x1, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0xe, 0x6, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, 0x13, 0x1b, 0x3, 0xb, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab},
- {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0xb, 0x2, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x4, 0xd, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0xf, 0x6, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x8, 0x1, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, 0x3, 0xa, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9, 0x1e, 0x17, 0xc, 0x5, 0x3a, 0x33, 0x28, 0x21, 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x7, 0xe, 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54},
- {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, 0xd, 0x7, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, 0x1a, 0x10, 0xe, 0x4, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, 0x17, 0x1d, 0x3, 0x9, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x5, 0xf, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x8, 0x2, 0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, 0xb, 0x1, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, 0x2e, 0x24, 0x3a, 0x30, 0x6, 0xc, 0x12, 0x18, 0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48},
- {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, 0x9, 0x2, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x4, 0xf, 0x3e, 0x35, 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0xd, 0x6, 0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x1, 0xa, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x8, 0x3, 0x1e, 0x15, 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, 0x5, 0xe, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0xc, 0x7, 0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7},
- {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, 0x25, 0x29, 0xd, 0x1, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0xf, 0x3, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e, 0x1a, 0x16, 0x2, 0xe, 0x2a, 0x26, 0x32, 0x3e, 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x6, 0xa, 0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, 0xb, 0x7, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d, 0x9, 0x5, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x4, 0x8, 0x1c, 0x10, 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70},
- {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x5, 0x8, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0xf, 0x2, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0xa, 0x7, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85, 0x1e, 0x13, 0x4, 0x9, 0x2a, 0x27, 0x30, 0x3d, 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x1, 0xc, 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0xb, 0x6, 0x25, 0x28, 0x3f, 0x32, 0x14, 0x19, 0xe, 0x3, 0x20, 0x2d, 0x3a, 0x37, 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f},
- {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x5, 0xb, 0x19, 0x17, 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0xf, 0x1, 0x13, 0x1d, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0xa, 0x4, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x7, 0x9, 0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, 0x2, 0xc, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae, 0x14, 0x1a, 0x8, 0x6, 0x2c, 0x22, 0x30, 0x3e, 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0xd, 0x3, 0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93},
- {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0xd, 0x2, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, 0x17, 0x18, 0x9, 0x6, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x4, 0xb, 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x1, 0xe, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0xc, 0x3, 0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x8, 0x7, 0x16, 0x19, 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, 0x39, 0x36, 0x27, 0x28, 0x5, 0xa, 0x1b, 0x14, 0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c},
- {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0x1d, 0xd, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, 0x1a, 0xa, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x7, 0x17, 0x67, 0x77, 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x4, 0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x9, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0xe, 0x1e, 0x2e, 0x3e, 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, 0x73, 0x63, 0x13, 0x3, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x8, 0x18, 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x5, 0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x2, 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, 0x4f, 0x5f, 0x6f, 0x7f, 0xf, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0xc, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1, 0x1, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, 0x6, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0xb, 0x7b, 0x6b, 0x5b, 0x4b},
- {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0xd, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0xb, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, 0x17, 0x6, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x7, 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb, 0x39, 0x28, 0x1b, 0xa, 0x7d, 0x6c, 0x5f, 0x4e, 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, 0xc, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x1, 0x10, 0x67, 0x76, 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0xe, 0x1f, 0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x3, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x5, 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x8, 0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x9, 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, 0x51, 0x40, 0x73, 0x62, 0x15, 0x4, 0x37, 0x26, 0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, 0x2, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0xf, 0x1e, 0x2d, 0x3c, 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4},
- {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0xb, 0x75, 0x67, 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x4, 0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, 0xf, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x8, 0x1a, 0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x3, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0, 0x1e, 0xc, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, 0x7, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x9, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x2, 0x34, 0x26, 0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0xd, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc, 0x22, 0x30, 0x6, 0x14, 0x6a, 0x78, 0x4e, 0x5c, 0x1, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0xa, 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x5, 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, 0x62, 0x70, 0xe, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8},
- {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, 0x2d, 0x3e, 0xb, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x5, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0xe, 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0xa, 0x19, 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0, 0x1, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0xf, 0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x4, 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0xc, 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x7, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x9, 0x1a, 0x63, 0x70, 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, 0x2, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x6, 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0xd, 0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x3, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf, 0x2e, 0x3d, 0x8, 0x1b, 0x62, 0x71, 0x44, 0x57},
- {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, 0xd, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, 0x1a, 0xe, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, 0x17, 0x3, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x5, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, 0x34, 0x20, 0x1c, 0x8, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0xb, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, 0x2e, 0x3a, 0x6, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0xa, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3, 0x2f, 0x3b, 0x7, 0x13, 0x7f, 0x6b, 0x57, 0x43, 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x4, 0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x9, 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, 0x1b, 0xf, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, 0x16, 0x2, 0x3e, 0x2a, 0x1, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, 0x5c, 0x48, 0x74, 0x60, 0xc, 0x18, 0x24, 0x30, 0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90},
- {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0xc, 0x33, 0x26, 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0xd, 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x1, 0x14, 0x29, 0x3c, 0x3, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0xf, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, 0x1b, 0xe, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, 0x2, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x6, 0x13, 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, 0x1f, 0xa, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0xb, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, 0x2d, 0x38, 0x7, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x5, 0x10, 0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x9, 0x62, 0x77, 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, 0x63, 0x76, 0x1d, 0x8, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7, 0x4, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f},
- {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x9, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x4, 0x3e, 0x28, 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, 0x1b, 0xd, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x1, 0x17, 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, 0x24, 0x32, 0x8, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x5, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0xc, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0xb, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0, 0x2, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60, 0x35, 0x23, 0x19, 0xf, 0x6d, 0x7b, 0x41, 0x57, 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, 0x64, 0x72, 0x10, 0x6, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0xa, 0x1c, 0x7e, 0x68, 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, 0x5b, 0x4d, 0x77, 0x61, 0x3, 0x15, 0x2f, 0x39, 0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0xe, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, 0x11, 0x7, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73},
- {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x8, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x7, 0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2, 0xf, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, 0x11, 0x6, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0xe, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x1, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, 0x1e, 0x9, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, 0x13, 0x4, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92, 0x22, 0x35, 0xc, 0x1b, 0x7e, 0x69, 0x50, 0x47, 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x3, 0x14, 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0xb, 0x32, 0x25, 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, 0x70, 0x67, 0x2, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0xa, 0x6f, 0x78, 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x5, 0x60, 0x77, 0x4e, 0x59, 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, 0x7f, 0x68, 0xd, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c},
- {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0xd, 0x15, 0x27, 0x3f, 0x17, 0xf, 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x2, 0x2a, 0x32, 0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x6, 0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b, 0x13, 0xb, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, 0x69, 0x71, 0x59, 0x41, 0x9, 0x11, 0x39, 0x21, 0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x4, 0x1c, 0x54, 0x4c, 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0xc, 0x14, 0x1, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x3, 0x2b, 0x33, 0x26, 0x3e, 0x16, 0xe, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0xa, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x7, 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x5, 0x1d, 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x8, 0x10, 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0},
- {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0xa, 0x7, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0xd, 0xe, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x4, 0x9, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x3, 0x1c, 0x5, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0xf, 0x16, 0x1b, 0x2, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x8, 0x11, 0x12, 0xb, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x1, 0x18, 0x15, 0xc, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x6, 0x1f},
- {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, 0x6d, 0x77, 0x59, 0x43, 0x5, 0x1f, 0x31, 0x2b, 0x67, 0x7d, 0x53, 0x49, 0xf, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c, 0xa, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x4, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, 0x1b, 0x1, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, 0x79, 0x63, 0x4d, 0x57, 0x11, 0xb, 0x25, 0x3f, 0x14, 0xe, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0xd, 0x17, 0x3c, 0x26, 0x8, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x2, 0x18, 0x5e, 0x44, 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x7, 0x1d, 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x9, 0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4, 0x22, 0x38, 0x16, 0xc, 0x4a, 0x50, 0x7e, 0x64, 0x28, 0x32, 0x1c, 0x6, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x3},
- {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, 0x19, 0x2, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x6, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, 0x4, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0xc, 0x17, 0x23, 0x38, 0x15, 0xe, 0x4f, 0x54, 0x79, 0x62, 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0xa, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x8, 0x13, 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, 0x1, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x3, 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x7, 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x5, 0x1e, 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0xd, 0x16, 0x22, 0x39, 0x14, 0xf, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89, 0x10, 0xb, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x9, 0x12, 0x3f, 0x24, 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc},
- {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89, 0x3d, 0x21, 0x5, 0x19, 0x4d, 0x51, 0x75, 0x69, 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0xf, 0x13, 0x7a, 0x66, 0x42, 0x5e, 0xa, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x7, 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x2, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0, 0x14, 0x8, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, 0x29, 0x35, 0x11, 0xd, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0xe, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0xb, 0x17, 0x33, 0x2f, 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x1, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88, 0x3c, 0x20, 0x4, 0x18, 0x4c, 0x50, 0x74, 0x68, 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x9, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0xc, 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x6, 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x3, 0x27, 0x3b},
- {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, 0x1f, 0x2, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x6, 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x4, 0x19, 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0xe, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d, 0x36, 0x2b, 0xc, 0x11, 0x42, 0x5f, 0x78, 0x65, 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, 0x46, 0x5b, 0x8, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0xa, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x1, 0x52, 0x4f, 0x68, 0x75, 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x3, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x7, 0x1a, 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x5, 0x22, 0x3f, 0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0xf, 0x12, 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, 0x10, 0xd, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x9, 0x7f, 0x62, 0x45, 0x58, 0xb, 0x16, 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4},
- {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7, 0xd, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, 0x17, 0x9, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x4, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x1, 0x5b, 0x45, 0x67, 0x79, 0x2e, 0x30, 0x12, 0xc, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x8, 0x16, 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, 0x39, 0x27, 0x5, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0xf, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x2, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x6, 0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0xb, 0x68, 0x76, 0x54, 0x4a, 0x10, 0xe, 0x2c, 0x32, 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, 0x1d, 0x3, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x7, 0x19, 0x3b, 0x25, 0x72, 0x6c, 0x4e, 0x50, 0xa, 0x14, 0x36, 0x28, 0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8},
- {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, 0x15, 0xa, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x1, 0x1e, 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0xb, 0x56, 0x49, 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, 0x55, 0x4a, 0x17, 0x8, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, 0x2, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x9, 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x3, 0x1c, 0x3b, 0x24, 0x5, 0x1a, 0x47, 0x58, 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, 0x10, 0xf, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1, 0x4, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59, 0x11, 0xe, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0xd, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x7, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, 0x6f, 0x70, 0x51, 0x4e, 0x13, 0xc, 0x2d, 0x32, 0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x6, 0x19, 0x38, 0x27},
- {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x7, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94, 0x69, 0x49, 0x29, 0x9, 0xe9, 0xc9, 0xa9, 0x89, 0x4e, 0x6e, 0xe, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x8, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0xf, 0x2f, 0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, 0x1, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x6, 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0xd, 0x2d, 0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0xa, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x4, 0x64, 0x44, 0x83, 0xa3, 0xc3, 0xe3, 0x3, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x5, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x2, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0xc, 0x2c, 0xcc, 0xec, 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0xb, 0xeb, 0xcb, 0xab, 0x8b, 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96},
- {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0xb, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, 0x3, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x8, 0xef, 0xce, 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0xd, 0x6e, 0x4f, 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, 0xc0, 0xe1, 0x6, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0xe, 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x5, 0x24, 0x4d, 0x6c, 0xf, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x4, 0xe3, 0xc2, 0xa1, 0x80, 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0xc, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4, 0x26, 0x7, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x2, 0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x9, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x1, 0x62, 0x43, 0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, 0xa, 0x2b, 0x48, 0x69},
- {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7, 0x2e, 0xc, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, 0x23, 0x1, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, 0x2c, 0xe, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x3, 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x2, 0x20, 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0xf, 0x2d, 0xc3, 0xe1, 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x6, 0x24, 0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0xb, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0xa, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x7, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x8, 0x6e, 0x4c, 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x5, 0x63, 0x41, 0x8c, 0xae, 0xc8, 0xea, 0x4, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, 0x9, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75},
- {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0xa, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3, 0xf, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0xb, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1, 0x2d, 0xe, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, 0x22, 0x1, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x4, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, 0x44, 0x67, 0x2, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, 0x7, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x8, 0x2b, 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0xd, 0x2e, 0xc7, 0xe4, 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x9, 0xe0, 0xc3, 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0xc, 0xe5, 0xc6, 0xa3, 0x80, 0x66, 0x45, 0x20, 0x3, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, 0x25, 0x6, 0xef, 0xcc, 0xa9, 0x8a},
- {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, 0x47, 0x63, 0xf, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x8, 0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x7, 0x6b, 0x4f, 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x9, 0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x6, 0x6a, 0x4e, 0x1, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0xe, 0x2a, 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0xb, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x4, 0x68, 0x4c, 0x3, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, 0x44, 0x60, 0xc, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x2, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84, 0x45, 0x61, 0xd, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0xa, 0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x5, 0x69, 0x4d},
- {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, 0x20, 0x5, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0xa, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0xf, 0x60, 0x45, 0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, 0x21, 0x4, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x1, 0x24, 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, 0x2b, 0xe, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf, 0xb, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, 0x8, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6, 0x28, 0xd, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, 0x96, 0xb3, 0xdc, 0xf9, 0x2, 0x27, 0x48, 0x6d, 0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x7, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0xc, 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, 0xd7, 0xf2, 0x9, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x6, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, 0x49, 0x6c, 0x3, 0x26, 0xdd, 0xf8, 0x97, 0xb2},
- {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0xa, 0x60, 0x46, 0x99, 0xbf, 0xd5, 0xf3, 0x1, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x9, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x2, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, 0x9b, 0xbd, 0xd7, 0xf1, 0x3, 0x25, 0x4f, 0x69, 0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x8, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0xe, 0x64, 0x42, 0x9d, 0xbb, 0xd1, 0xf7, 0x5, 0x23, 0x49, 0x6f, 0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81, 0x4, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6, 0x29, 0xf, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, 0x9f, 0xb9, 0xd3, 0xf5, 0x7, 0x21, 0x4b, 0x6d, 0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0xc, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a, 0x2b, 0xd, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, 0x6, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae},
- {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, 0x4a, 0x6d, 0x4, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, 0x21, 0x6, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, 0x8, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0xa, 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0xc, 0x2b, 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0xe, 0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x3, 0xf6, 0xd1, 0xb8, 0x9f, 0x4f, 0x68, 0x1, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x7, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x5, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0xb, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x9, 0x2e, 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0xf, 0x66, 0x41, 0x91, 0xb6, 0xdf, 0xf8, 0xd, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51},
- {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0xb, 0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x6, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0xa, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x7, 0x7f, 0x57, 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e, 0x1, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, 0x5c, 0x74, 0xc, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c, 0x3, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb, 0x5e, 0x76, 0xe, 0x26, 0xfe, 0xd6, 0xae, 0x86, 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x8, 0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x5, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x9, 0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x4, 0x7c, 0x54, 0x2, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda, 0x5f, 0x77, 0xf, 0x27, 0xff, 0xd7, 0xaf, 0x87, 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d},
- {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0xe, 0x27, 0x5c, 0x75, 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x9, 0x20, 0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x4, 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x3, 0x78, 0x51, 0x71, 0x58, 0x23, 0xa, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0xd, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0xb, 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0xc, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x5, 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x2, 0x79, 0x50, 0xab, 0x82, 0xf9, 0xd0, 0xf, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x8, 0x21, 0x1, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde, 0x54, 0x7d, 0x6, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2},
- {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x1, 0x29, 0x3, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x2, 0x28, 0x52, 0x78, 0x6, 0x2c, 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x7, 0x79, 0x53, 0x7b, 0x51, 0x2f, 0x5, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x4, 0x2e, 0x50, 0x7a, 0xa4, 0x8e, 0xf0, 0xda, 0xc, 0x26, 0x58, 0x72, 0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0xd, 0xdb, 0xf1, 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0xf, 0x71, 0x5b, 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0xe, 0x24, 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0xa, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0xb, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x9, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, 0x8, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde},
- {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0xd, 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, 0x9, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x4, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, 0x57, 0x7c, 0x1, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0xc, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca, 0x5e, 0x75, 0x8, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x5, 0x24, 0xf, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x2, 0x29, 0x54, 0x7f, 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x6, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0xb, 0x20, 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, 0x25, 0xe, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x3, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, 0x7a, 0x51, 0x2c, 0x7, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0xa, 0x21},
- {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9, 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x1, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x8, 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0xb, 0xb2, 0x9e, 0xea, 0xc6, 0x2, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, 0x26, 0xa, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, 0x3, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, 0x21, 0xd, 0xc9, 0xe5, 0x91, 0xbd, 0x4, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0xc, 0x78, 0x54, 0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x5, 0x29, 0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x6, 0x2a, 0x5e, 0x72, 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0xf, 0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x7, 0x2b, 0xef, 0xc3, 0xb7, 0x9b, 0x22, 0xe, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6},
- {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x4, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x6, 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0xa, 0xbc, 0x91, 0xe6, 0xcb, 0x8, 0x25, 0x52, 0x7f, 0x23, 0xe, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0xc, 0x21, 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x3, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x1, 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x7, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x5, 0x72, 0x5f, 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x9, 0xbf, 0x92, 0xe5, 0xc8, 0xb, 0x26, 0x51, 0x7c, 0x20, 0xd, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0xf, 0x22, 0xe1, 0xcc, 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19},
- {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, 0xb7, 0x99, 0xeb, 0xc5, 0xf, 0x21, 0x53, 0x7d, 0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0xe, 0x73, 0x5d, 0x2f, 0x1, 0xcb, 0xe5, 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0xc, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x3, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x2, 0x2c, 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0xd, 0x23, 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x8, 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x7, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x6, 0x28, 0xe2, 0xcc, 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x9, 0x27, 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, 0x4, 0x2a, 0x58, 0x76, 0xb, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, 0x78, 0x56, 0x24, 0xa, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x5},
- {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x7, 0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0xe, 0x21, 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x9, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0xf, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x8, 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x1, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81, 0x29, 0x6, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, 0x25, 0xa, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0xd, 0x22, 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x4, 0x75, 0x5a, 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, 0x3, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x5, 0xc8, 0xe7, 0x96, 0xb9, 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, 0xe0, 0xcf, 0x2, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, 0x24, 0xb, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30, 0x52, 0x7d, 0xc, 0x23, 0xee, 0xc1, 0xb0, 0x9f, 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa},
- {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x9, 0x39, 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x4, 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0xc, 0x1, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x5, 0x55, 0x65, 0x68, 0x58, 0x8, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28, 0x2, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0xf, 0x6b, 0x5b, 0xb, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x6, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0xe, 0x3, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x7, 0x57, 0x67, 0x6a, 0x5a, 0xa, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd},
- {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2, 0x37, 0x6, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x4, 0x35, 0x6e, 0x5f, 0xc, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0xe, 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0xa, 0x9d, 0xac, 0xff, 0xce, 0xcc, 0xfd, 0xae, 0x9f, 0x8, 0x39, 0x6a, 0x5b, 0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x3, 0x32, 0x30, 0x1, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x5, 0x7, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0xf, 0x3e, 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0xd, 0x9a, 0xab, 0xf8, 0xc9, 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x9, 0x5a, 0x6b, 0x69, 0x58, 0xb, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22},
- {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x7, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14, 0xe, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x9, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0xf, 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x8, 0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x1, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x6, 0x38, 0xa, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0xd, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, 0x36, 0x4, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x3, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x5, 0x37, 0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x2, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0xb, 0x39, 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0xc, 0x3e},
- {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0xb, 0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x1, 0x32, 0x39, 0xa, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, 0x5c, 0x6f, 0x3a, 0x9, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x2, 0x31, 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x8, 0x5d, 0x6e, 0x65, 0x56, 0x3, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21, 0x3d, 0xe, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x5, 0x36, 0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0xf, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, 0x4, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x7, 0x34, 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0xc, 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, 0xca, 0xf9, 0xac, 0x9f, 0x6, 0x35, 0x60, 0x53, 0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0xd, 0x94, 0xa7, 0xf2, 0xc1},
- {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31, 0x67, 0x53, 0xf, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, 0xa, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0xd, 0x3c, 0x8, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x2, 0x5e, 0x6a, 0x5b, 0x6f, 0x33, 0x7, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x1, 0x35, 0x69, 0x5d, 0x6c, 0x58, 0x4, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0xe, 0x3a, 0xb, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0xc, 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x9, 0x55, 0x61, 0x37, 0x3, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x6},
- {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0xf, 0x50, 0x65, 0x5b, 0x6e, 0x31, 0x4, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x8, 0x3d, 0x3, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, 0x58, 0x6d, 0x32, 0x7, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, 0x87, 0xb2, 0x39, 0xc, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x9, 0x56, 0x63, 0x5d, 0x68, 0x37, 0x2, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f, 0x6, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0xd, 0x38, 0x5e, 0x6b, 0x34, 0x1, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, 0x81, 0xb4, 0x3f, 0xa, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0xe, 0x3b, 0x5, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9},
- {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, 0x86, 0xb0, 0x32, 0x4, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0xc, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, 0x64, 0x52, 0x8, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x1, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x5, 0x5f, 0x69, 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0xd, 0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x9, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7, 0x2, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, 0x30, 0x6, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0xe, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, 0xa, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x3, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x7, 0x5d, 0x6b, 0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0xf, 0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48, 0x67, 0x51, 0xb, 0x3d, 0xbf, 0x89, 0xd3, 0xe5},
- {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0xe, 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, 0xb, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x5, 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0xf, 0x56, 0x61, 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x1, 0x36, 0xef, 0xd8, 0x81, 0xb6, 0x33, 0x4, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0xa, 0x3d, 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x7, 0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x9, 0x3e, 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0xc, 0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x2, 0x35, 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, 0x66, 0x51, 0x8, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x6, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x3, 0x34, 0xb1, 0x86, 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, 0x3a, 0xd, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a},
- {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0xf, 0x7a, 0x42, 0xa, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0xe, 0x7b, 0x43, 0xb, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x1, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x4, 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x2, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x7, 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0xd, 0x78, 0x40, 0x8, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0xc, 0x79, 0x41, 0x9, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x3, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x6, 0x4e, 0x76},
- {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a, 0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x1, 0x38, 0x97, 0xae, 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x9, 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, 0x2, 0x3b, 0x70, 0x49, 0x33, 0xa, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x3, 0x3a, 0x40, 0x79, 0x32, 0xb, 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0xc, 0x47, 0x7e, 0x4, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0xd, 0x77, 0x4e, 0x5, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f, 0x37, 0xe, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x6, 0x3f, 0x74, 0x4d, 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0xf, 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x7, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89},
- {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x4, 0xa2, 0x98, 0xd6, 0xec, 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, 0xaa, 0x90, 0x36, 0xc, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x8, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, 0xeb, 0xd1, 0x9f, 0xa5, 0x3, 0x39, 0x77, 0x4d, 0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x7, 0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca, 0x35, 0xf, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, 0xb, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x2, 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, 0x6, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0xe, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0xa, 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x1, 0xed, 0xd7, 0x99, 0xa3, 0x5, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86, 0x79, 0x43, 0xd, 0x37, 0x91, 0xab, 0xe5, 0xdf, 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x9, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95},
- {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0xd, 0x36, 0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x8, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x5, 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x2, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50, 0x34, 0xf, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0xa, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63, 0x7, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, 0x9, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x4, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e, 0x3a, 0x1, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0xc, 0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0xb, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce, 0x3d, 0x6, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x3, 0x38, 0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd, 0xe, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a},
- {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x8, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x5, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0xf, 0x46, 0x7a, 0x3e, 0x2, 0xb6, 0x8a, 0xce, 0xf2, 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x7, 0x3b, 0x72, 0x4e, 0xa, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x4, 0x38, 0x71, 0x4d, 0x9, 0x35, 0x81, 0xbd, 0xf9, 0xc5, 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0xc, 0x45, 0x79, 0x3d, 0x1, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0xb, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x6, 0x42, 0x7e, 0x3, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0xe, 0x32, 0x76, 0x4a, 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad},
- {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x3, 0x3e, 0x79, 0x44, 0x2, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, 0x89, 0xb4, 0x7, 0x3a, 0x7d, 0x40, 0x6, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5, 0x4, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x5, 0x38, 0x7f, 0x42, 0xfb, 0xc6, 0x81, 0xbc, 0xf, 0x32, 0x75, 0x48, 0xe, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0xc, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0xd, 0x30, 0x77, 0x4a, 0x8, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, 0xfd, 0xc0, 0x87, 0xba, 0x9, 0x34, 0x73, 0x4e, 0xff, 0xc2, 0x85, 0xb8, 0xb, 0x36, 0x71, 0x4c, 0xa, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52},
- {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x1, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29, 0x7e, 0x40, 0x2, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x3, 0x3b, 0x5, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x4, 0x3a, 0x78, 0x46, 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x7, 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x6, 0x38, 0x76, 0x48, 0xa, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0xb, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, 0x8, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x9, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0xf, 0xb5, 0x8b, 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0xe, 0x30, 0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64, 0x33, 0xd, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, 0xf4, 0xca, 0x88, 0xb6, 0xc, 0x32, 0x70, 0x4e},
- {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a, 0x32, 0xd, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0xe, 0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x3, 0x3c, 0x7b, 0x44, 0x5, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x8, 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0xb, 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, 0x84, 0xbb, 0x6, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, 0xa, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x7, 0x46, 0x79, 0x45, 0x7a, 0x3b, 0x4, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f, 0x77, 0x48, 0x9, 0x36, 0x8b, 0xb4, 0xf5, 0xca, 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0xf, 0x30, 0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x2, 0x3e, 0x1, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0xc, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1},
- {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0xe, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0xf, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x1, 0x41, 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, 0x4d, 0xd, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64, 0x83, 0xc3, 0x3, 0x43, 0x9e, 0xde, 0x1e, 0x5e, 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, 0x9f, 0xdf, 0x2, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, 0x4c, 0xc, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6, 0x87, 0xc7, 0x7, 0x47, 0x9a, 0xda, 0x1a, 0x5a, 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x9, 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x8, 0xc8, 0x88, 0x1b, 0x5b, 0x9b, 0xdb, 0x6, 0x46, 0x86, 0xc6, 0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0xa, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, 0x4, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x5, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0xb, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31},
- {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0xe, 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0xb, 0xd1, 0x90, 0x53, 0x12, 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x5, 0x44, 0x8d, 0xcc, 0xf, 0x4e, 0x94, 0xd5, 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x1, 0x45, 0x4, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, 0x13, 0x52, 0x91, 0xd0, 0xa, 0x4b, 0x88, 0xc9, 0x7, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x9, 0xca, 0x8b, 0xcf, 0x8e, 0x4d, 0xc, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x2, 0x43, 0x8a, 0xcb, 0x8, 0x49, 0x93, 0xd2, 0x11, 0x50, 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x6, 0x42, 0x3, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, 0x96, 0xd7, 0xd, 0x4c, 0x8f, 0xce},
- {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x3, 0xc5, 0x87, 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x6, 0x44, 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x5, 0x4d, 0xf, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, 0xc, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, 0xcf, 0x8d, 0x4b, 0x9, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0xa, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0xb, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, 0xce, 0x8c, 0x4a, 0x8, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0xd, 0x4f, 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, 0x4c, 0xe, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x4, 0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x7, 0x45, 0x96, 0xd4, 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, 0xd1, 0x93, 0x40, 0x2, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8, 0x1, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2},
- {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x7, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0xe, 0x4d, 0x99, 0xda, 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, 0xcc, 0x8f, 0x4a, 0x9, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0xd, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb, 0x49, 0xa, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, 0x3, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x4, 0xd0, 0x93, 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, 0x1a, 0x59, 0x9c, 0xdf, 0xb, 0x48, 0x8d, 0xce, 0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0xc, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x5, 0x46, 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x2, 0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x6, 0x45, 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, 0x53, 0x10, 0xd5, 0x96, 0x42, 0x1, 0xc4, 0x87, 0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x8, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0xf, 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d},
- {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x2, 0xce, 0x8a, 0x4b, 0xf, 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, 0xca, 0x8e, 0x42, 0x6, 0xc7, 0x83, 0x4f, 0xb, 0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, 0x8c, 0xc8, 0x4, 0x40, 0x81, 0xc5, 0x9, 0x4d, 0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66, 0x89, 0xcd, 0x1, 0x45, 0x84, 0xc0, 0xc, 0x48, 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x3, 0xc2, 0x86, 0x4a, 0xe, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98, 0x43, 0x7, 0xcb, 0x8f, 0x4e, 0xa, 0xc6, 0x82, 0x5, 0x41, 0x8d, 0xc9, 0x8, 0x4c, 0x80, 0xc4, 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea},
- {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0xd, 0xc2, 0x87, 0x41, 0x4, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x8, 0x4d, 0x8b, 0xce, 0x1, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, 0x40, 0x5, 0xc3, 0x86, 0x49, 0xc, 0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0xb, 0x4e, 0x81, 0xc4, 0x2, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x6, 0xc9, 0x8c, 0x4a, 0xf, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79, 0x89, 0xcc, 0x3, 0x46, 0x80, 0xc5, 0xa, 0x4f, 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0xe, 0xc8, 0x8d, 0x42, 0x7, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15},
- {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, 0x44, 0x2, 0xc8, 0x8e, 0x41, 0x7, 0xcd, 0x8b, 0x4e, 0x8, 0xc2, 0x84, 0x4b, 0xd, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, 0x88, 0xce, 0x4, 0x42, 0x8d, 0xcb, 0x1, 0x47, 0x82, 0xc4, 0xe, 0x48, 0x87, 0xc1, 0xb, 0x4d, 0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, 0xcc, 0x8a, 0x40, 0x6, 0xc9, 0x8f, 0x45, 0x3, 0xc6, 0x80, 0x4a, 0xc, 0xc3, 0x85, 0x4f, 0x9},
- {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca, 0x4, 0x43, 0x8a, 0xcd, 0x5, 0x42, 0x8b, 0xcc, 0x6, 0x41, 0x88, 0xcf, 0x7, 0x40, 0x89, 0xce, 0x8, 0x4f, 0x86, 0xc1, 0x9, 0x4e, 0x87, 0xc0, 0xa, 0x4d, 0x84, 0xc3, 0xb, 0x4c, 0x85, 0xc2, 0xc, 0x4b, 0x82, 0xc5, 0xd, 0x4a, 0x83, 0xc4, 0xe, 0x49, 0x80, 0xc7, 0xf, 0x48, 0x81, 0xc6, 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6},
- {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f, 0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, 0x1, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, 0xeb, 0xa3, 0x46, 0xe, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68, 0x3, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, 0x79, 0x31, 0xe9, 0xa1, 0x44, 0xc, 0xd4, 0x9c, 0x2, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, 0x45, 0xd, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x7, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x8, 0xd0, 0x98, 0x6, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x9, 0xd1, 0x99, 0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d, 0x4, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0xb, 0xd3, 0x9b, 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e, 0x5, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, 0x7f, 0x37, 0xef, 0xa7, 0x42, 0xa, 0xd2, 0x9a},
- {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x6, 0x96, 0xdf, 0x4, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, 0xd5, 0x9c, 0x47, 0xe, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0xc, 0x45, 0x31, 0x78, 0xa3, 0xea, 0x8, 0x41, 0x9a, 0xd3, 0x43, 0xa, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x3, 0xd8, 0x91, 0x1, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3, 0x97, 0xde, 0x5, 0x4c, 0xae, 0xe7, 0x3c, 0x75, 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x7, 0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0xd, 0x44, 0xd4, 0x9d, 0x46, 0xf, 0xed, 0xa4, 0x7f, 0x36, 0x42, 0xb, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x9, 0x40, 0x9b, 0xd2, 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65},
- {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, 0xd4, 0x9e, 0x40, 0xa, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x1, 0xea, 0xa0, 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, 0xb, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x8, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x2, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x9, 0x43, 0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x3, 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x5, 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, 0xae, 0xe4, 0xf, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x4, 0x4e, 0x90, 0xda, 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0xe, 0x99, 0xd3, 0xd, 0x47, 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18, 0x4d, 0x7, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0xc, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13, 0x92, 0xd8, 0x6, 0x4c, 0xa7, 0xed, 0x33, 0x79},
- {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x1, 0x4a, 0x95, 0xde, 0x3, 0x48, 0xa4, 0xef, 0x32, 0x79, 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, 0x2, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x6, 0x4d, 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, 0x7, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x5, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, 0x4, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0xc, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0xd, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, 0xf, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, 0xe, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, 0x3b, 0x70, 0xad, 0xe6, 0xa, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, 0xb, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x9, 0x42, 0x8, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86},
- {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x1, 0x4d, 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, 0x2, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38, 0x9b, 0xd7, 0x3, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x5, 0x49, 0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7, 0x4, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd, 0x9f, 0xd3, 0x7, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, 0xb3, 0xff, 0x6, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30, 0x93, 0xdf, 0xb, 0x47, 0xbe, 0xf2, 0x26, 0x6a, 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, 0xbf, 0xf3, 0xa, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x9, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f, 0x8, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, 0xbb, 0xf7, 0xe, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0xf, 0x43, 0xba, 0xf6, 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0xc, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0xd, 0x41},
- {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x8, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab, 0x7, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0xf, 0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x6, 0xe, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x1, 0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5, 0x9, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, 0x49, 0x4, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0xc, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, 0x4e, 0x3, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0xb, 0x46, 0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x2, 0x4f, 0x47, 0xa, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x5, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, 0x40, 0xd, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe},
- {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x8, 0x46, 0xb1, 0xff, 0x2d, 0x63, 0xde, 0x90, 0x42, 0xc, 0xfb, 0xb5, 0x67, 0x29, 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x1, 0xd3, 0x9d, 0x20, 0x6e, 0xbc, 0xf2, 0x5, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x9, 0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0xd, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x6, 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x2, 0x4c, 0xbb, 0xf5, 0x27, 0x69, 0x40, 0xe, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, 0xa, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x7, 0x49, 0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x3, 0x2a, 0x64, 0xb6, 0xf8, 0xf, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0xb, 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2},
- {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7, 0x91, 0xde, 0xf, 0x40, 0xb0, 0xff, 0x2e, 0x61, 0xd3, 0x9c, 0x4d, 0x2, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, 0xb4, 0xfb, 0xb, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, 0x49, 0x6, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x4, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x9, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, 0x4e, 0x1, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0xc, 0x43, 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0xe, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1, 0x3, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0xa, 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x7, 0x48, 0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x5, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, 0x8, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d},
- {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x1, 0x51, 0xa1, 0xf1, 0x5c, 0xc, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14, 0x3, 0x53, 0xa3, 0xf3, 0x5e, 0xe, 0xfe, 0xae, 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c, 0x2, 0x52, 0xa2, 0xf2, 0x5f, 0xf, 0xff, 0xaf, 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78, 0x6, 0x56, 0xa6, 0xf6, 0x5b, 0xb, 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x7, 0x57, 0xa7, 0xf7, 0x5a, 0xa, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x5, 0x55, 0xa5, 0xf5, 0x58, 0x8, 0xf8, 0xa8, 0x4, 0x54, 0xa4, 0xf4, 0x59, 0x9, 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a},
- {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, 0x50, 0x1, 0xab, 0xfa, 0x9, 0x58, 0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, 0xf9, 0xa8, 0x5b, 0xa, 0xa0, 0xf1, 0x2, 0x53, 0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98, 0xb, 0x5a, 0xa9, 0xf8, 0x52, 0x3, 0xf0, 0xa1, 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45, 0x5d, 0xc, 0xff, 0xae, 0x4, 0x55, 0xa6, 0xf7, 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0xd, 0x5c, 0xf6, 0xa7, 0x54, 0x5, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x6, 0x57, 0xfd, 0xac, 0x5f, 0xe, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x7, 0xf4, 0xa5, 0xf, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85},
- {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9, 0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, 0xf, 0x5d, 0xfe, 0xac, 0x5a, 0x8, 0x1, 0x53, 0xa5, 0xf7, 0x54, 0x6, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, 0xd, 0x5f, 0xfc, 0xae, 0x58, 0xa, 0x3, 0x51, 0xa7, 0xf5, 0x56, 0x4, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, 0x2, 0x50, 0xa6, 0xf4, 0x57, 0x5, 0xf3, 0xa1, 0xa8, 0xfa, 0xc, 0x5e, 0xfd, 0xaf, 0x59, 0xb, 0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99},
- {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6, 0x59, 0xa, 0xff, 0xac, 0x8, 0x5b, 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0xe, 0xaa, 0xf9, 0xc, 0x5f, 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x7, 0xa3, 0xf0, 0x5, 0x56, 0x50, 0x3, 0xf6, 0xa5, 0x1, 0x52, 0xa7, 0xf4, 0xab, 0xf8, 0xd, 0x5e, 0xfa, 0xa9, 0x5c, 0xf, 0x9, 0x5a, 0xaf, 0xfc, 0x58, 0xb, 0xfe, 0xad, 0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66},
- {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x2, 0x52, 0x6, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x4, 0x50, 0xa4, 0xf0, 0xc, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0xe, 0xf2, 0xa6, 0xf6, 0xa2, 0x5e, 0xa, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x8, 0x5c, 0xa0, 0xf4, 0x55, 0x1, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x3, 0x57, 0x7, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x5, 0xf1, 0xa5, 0x59, 0xd, 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, 0x42, 0x16, 0xea, 0xbe, 0xf, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, 0xb, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x9, 0xf5, 0xa1},
- {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x1, 0x54, 0xe2, 0xb7, 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x3, 0x56, 0x4b, 0x1e, 0xe1, 0xb4, 0x2, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x7, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, 0xe5, 0xb0, 0x6, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20, 0x4, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2, 0xaf, 0xfa, 0x5, 0x50, 0xe6, 0xb3, 0x4c, 0x19, 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, 0xe, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0xf, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, 0xa7, 0xf2, 0xd, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28, 0xc, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, 0xa3, 0xf6, 0x9, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x8, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0xa, 0x5f, 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0xb, 0x5e},
- {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35, 0x9, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, 0xbe, 0xe8, 0x57, 0x1, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x8, 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, 0x2, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0xb, 0x5d, 0xe2, 0xb4, 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x3, 0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0xa, 0x48, 0x1e, 0xe4, 0xb2, 0xd, 0x5b, 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, 0x41, 0x17, 0xed, 0xbb, 0x4, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0xc, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f, 0x53, 0x5, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0xf, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x6, 0x50, 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, 0x58, 0xe, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x7, 0xb8, 0xee, 0x14, 0x42},
- {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, 0xb7, 0xe0, 0x58, 0xf, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x8, 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, 0x7, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x9, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, 0xff, 0xa8, 0x51, 0x6, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x1, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0xe, 0x59, 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, 0xe4, 0xb3, 0xb, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x4, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x3, 0xbb, 0xec, 0x15, 0x42, 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0xc, 0x5b, 0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x2, 0x55, 0xed, 0xba, 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0xd, 0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0xa, 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, 0x5, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd},
- {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x1, 0x94, 0xcc, 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x2, 0x5a, 0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, 0x5b, 0x3, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, 0x79, 0x21, 0xc9, 0x91, 0x4, 0x5c, 0xb4, 0xec, 0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x5, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9, 0xb6, 0xee, 0x6, 0x5e, 0xcb, 0x93, 0x7b, 0x23, 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x7, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x9, 0x9c, 0xc4, 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, 0x8, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, 0x53, 0xb, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0xa, 0x52, 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0xd, 0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, 0xc, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e, 0x57, 0xf, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, 0xe, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1},
- {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0xb, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x4, 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, 0xe4, 0xbd, 0x56, 0xf, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x8, 0x51, 0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x3, 0x5a, 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0xc, 0xe7, 0xbe, 0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x7, 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x9, 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x2, 0x74, 0x2d, 0xc6, 0x9f, 0xd, 0x54, 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, 0xcd, 0x94, 0x6, 0x5f, 0xb4, 0xed, 0x58, 0x1, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, 0x53, 0xa, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, 0x5, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc, 0xbc, 0xe5, 0xe, 0x57, 0xc5, 0x9c, 0x77, 0x2e},
- {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x8, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0xc, 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37, 0x3, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x7, 0x9c, 0xc6, 0x28, 0x72, 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0xb, 0x51, 0x20, 0x7a, 0x94, 0xce, 0x55, 0xf, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x6, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x2, 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0xe, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0xa, 0xe4, 0xbe, 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31, 0x5, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, 0xef, 0xb5, 0x5b, 0x1, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0xd, 0x57, 0x26, 0x7c, 0x92, 0xc8, 0x53, 0x9, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32},
- {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x7, 0x5c, 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x8, 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x6, 0x78, 0x23, 0xce, 0x95, 0x9, 0x52, 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x1, 0x9d, 0xc6, 0x2b, 0x70, 0xe, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, 0x5f, 0x4, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, 0xb, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, 0x58, 0x3, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0xc, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x2, 0x59, 0x27, 0x7c, 0x91, 0xca, 0x56, 0xd, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x5, 0x5e, 0xc2, 0x99, 0x74, 0x2f, 0x51, 0xa, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd},
- {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53, 0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, 0xe6, 0xba, 0x5e, 0x2, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, 0x84, 0xd8, 0x51, 0xd, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x6, 0xe2, 0xbe, 0xed, 0xb1, 0x55, 0x9, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x4, 0x58, 0xb, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x1, 0x52, 0xe, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x3, 0x5f, 0xbb, 0xe7, 0xb4, 0xe8, 0xc, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x7, 0x5b, 0xd2, 0x8e, 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x8, 0x54, 0xb0, 0xec, 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3, 0x59, 0x5, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0xa},
- {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c, 0xb9, 0xe4, 0x3, 0x5e, 0xd0, 0x8d, 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x2, 0x5f, 0xb8, 0xe5, 0x6f, 0x32, 0xd5, 0x88, 0x6, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, 0x7, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x5, 0x58, 0x4, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0xd, 0x50, 0xc, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, 0xdd, 0x80, 0xe, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0xf, 0x52, 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0xb, 0x56, 0xd8, 0x85, 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0xa, 0x57, 0xb0, 0xed, 0x8, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x9, 0x54, 0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5},
- {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0xe, 0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0xf, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x1, 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0xd, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10, 0x5d, 0x3, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x2, 0x5c, 0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0xc, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x7, 0xe5, 0xbb, 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, 0x9, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x8, 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, 0xba, 0xe4, 0x6, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0xa, 0x8d, 0xd3, 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, 0x61, 0x3f, 0xdd, 0x83, 0x4, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x5, 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0xb, 0x55, 0xb7, 0xe9},
- {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x4, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, 0xed, 0xb2, 0x53, 0xc, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, 0x8, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x1, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x5, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0xd, 0x52, 0xd2, 0x8d, 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x9, 0x89, 0xd6, 0x37, 0x68, 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, 0x2, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x6, 0x86, 0xd9, 0x38, 0x67, 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0xe, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0xa, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x3, 0x83, 0xdc, 0x3d, 0x62, 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x7, 0x58, 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d, 0xf, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, 0x54, 0xb, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16},
- {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, 0x69, 0x9, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, 0x5c, 0x3c, 0x1, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x8, 0xa8, 0xc8, 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x2, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0xb, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, 0x9e, 0xfe, 0x5e, 0x3e, 0x3, 0x63, 0xc3, 0xa3, 0xf7, 0x97, 0x37, 0x57, 0x6a, 0xa, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77, 0x6d, 0xd, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, 0x4, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0xc, 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x5, 0x65, 0xc5, 0xa5, 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0xf, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c, 0x6, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, 0xf3, 0x93, 0x33, 0x53, 0x6e, 0xe, 0xae, 0xce, 0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x7, 0x67, 0xc7, 0xa7},
- {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x5, 0x64, 0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0xa, 0x6b, 0xc8, 0xa9, 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, 0xf, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x4, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x1, 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, 0x6f, 0xe, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0xb, 0x31, 0x50, 0xf3, 0x92, 0xca, 0xab, 0x8, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, 0xd, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x2, 0x63, 0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x7, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0xc, 0x36, 0x57, 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, 0xf1, 0x90, 0x33, 0x52, 0x68, 0x9, 0xaa, 0xcb, 0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x6, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, 0x62, 0x3, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58},
- {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4, 0x6e, 0xc, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x8, 0x6a, 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x3, 0x30, 0x52, 0xf4, 0x96, 0x92, 0xf0, 0x56, 0x34, 0x7, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, 0xf, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, 0x69, 0xb, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x6, 0x64, 0x60, 0x2, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0xa, 0xe, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, 0xf2, 0x90, 0x36, 0x54, 0x67, 0x5, 0xa3, 0xc1, 0xc5, 0xa7, 0x1, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, 0x9, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0xd, 0x3e, 0x5c, 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44},
- {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb, 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0xe, 0xab, 0xc8, 0xc3, 0xa0, 0x5, 0x66, 0x52, 0x31, 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0xa, 0x69, 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x1, 0x35, 0x56, 0xf3, 0x90, 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x4, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0xf, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, 0x6f, 0xc, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, 0x7, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x2, 0x61, 0x6a, 0x9, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x6, 0xd, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, 0x8, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, 0x60, 0x3, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb},
- {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0xe, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f, 0x9, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x5, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x2, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0xb, 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0xc, 0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, 0x6c, 0x8, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0xf, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x6, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43, 0x65, 0x1, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0xd, 0x69, 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0xa, 0x6e, 0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x3, 0x67, 0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x4, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c},
- {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x4, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0xb, 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x3, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40, 0x69, 0xc, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x7, 0x62, 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x8, 0x6d, 0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, 0xee, 0x8b, 0x24, 0x41, 0x67, 0x2, 0xad, 0xc8, 0xe1, 0x84, 0x2b, 0x4e, 0x68, 0xd, 0xa2, 0xc7, 0xcc, 0xa9, 0x6, 0x63, 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x9, 0x6c, 0x4a, 0x2f, 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, 0x42, 0x27, 0x1, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, 0xe, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x5, 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0xa, 0x2c, 0x49, 0xe6, 0x83},
- {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x1, 0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x2, 0x64, 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x3, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x5, 0x2a, 0x4c, 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, 0x81, 0xe7, 0x4d, 0x2b, 0x4, 0x62, 0xc8, 0xae, 0xe4, 0x82, 0x28, 0x4e, 0x61, 0x7, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x6, 0x60, 0x4f, 0x29, 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, 0x6d, 0xb, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0xa, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x9, 0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x8, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa, 0xc2, 0xa4, 0xe, 0x68, 0x47, 0x21, 0x8b, 0xed, 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, 0x20, 0x46, 0x69, 0xf, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, 0xc, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0xd, 0x22, 0x44, 0xee, 0x88, 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f},
- {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x9, 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x4, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0xd, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x1, 0xa8, 0xcf, 0xc6, 0xa1, 0x8, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x5, 0x62, 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, 0x6b, 0xc, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, 0x6c, 0xb, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, 0xcc, 0xab, 0x2, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, 0xf, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x6, 0x2e, 0x49, 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0xa, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x3, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c, 0x69, 0xe, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x7, 0x60},
- {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62, 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x1, 0x69, 0x6c, 0x4, 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0xe, 0xb, 0x63, 0xdb, 0xb3, 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x7, 0x2, 0x6a, 0xd2, 0xba, 0xd8, 0xb0, 0x8, 0x60, 0x65, 0xd, 0xb5, 0xdd, 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x6, 0xbe, 0xd6, 0xd3, 0xbb, 0x3, 0x6b, 0x9, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0xc},
- {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d, 0xde, 0xb7, 0xc, 0x65, 0x67, 0xe, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0xa, 0x8, 0x61, 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, 0x6c, 0x5, 0x7, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x3, 0x6a, 0x68, 0x1, 0xba, 0xd3, 0x60, 0x9, 0xb2, 0xdb, 0xd9, 0xb0, 0xb, 0x62, 0xf, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0xd, 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3},
- {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x8, 0x3, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a, 0x6, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0xd, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, 0x64, 0xe, 0x5, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, 0x6e, 0x4, 0xf, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e, 0xc, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x7, 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, 0x68, 0x2, 0x9, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0xa, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x1, 0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef},
- {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0xb, 0xb6, 0xdd, 0xd1, 0xba, 0x7, 0x6c, 0xdf, 0xb4, 0x9, 0x62, 0x6e, 0x5, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x2, 0xe, 0x65, 0xd8, 0xb3, 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, 0xa, 0x61, 0x6d, 0x6, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, 0x6a, 0x1, 0xd, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70, 0x3, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0xf, 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee, 0x63, 0x8, 0xb5, 0xde, 0xd2, 0xb9, 0x4, 0x6f, 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10},
- {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x8, 0xbc, 0xd0, 0x1, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x9, 0xbd, 0xd1, 0x2, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, 0x66, 0xa, 0xbe, 0xd2, 0x3, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, 0x12, 0x7e, 0x67, 0xb, 0xbf, 0xd3, 0x4, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, 0xcd, 0xa1, 0x15, 0x79, 0x60, 0xc, 0xb8, 0xd4, 0x5, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0xd, 0xb9, 0xd5, 0x6, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0xe, 0xba, 0xd6, 0x7, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, 0x63, 0xf, 0xbb, 0xd7},
- {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, 0xb, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x3, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x8, 0x16, 0x7b, 0xcc, 0xa1, 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0xd, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x6, 0x6b, 0x75, 0x18, 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, 0x63, 0xe, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x5, 0x1b, 0x76, 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0xf, 0xb8, 0xd5, 0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x4, 0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0xc, 0x61, 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x7, 0x6a, 0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x2, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x9, 0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, 0x1, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0xa, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28},
- {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, 0xb, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x1, 0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0xa, 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95, 0xd5, 0xbb, 0x9, 0x67, 0x70, 0x1e, 0xac, 0xc2, 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x2, 0x6c, 0xc3, 0xad, 0x1f, 0x71, 0x66, 0x8, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83, 0x6d, 0x3, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0xe, 0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x5, 0x12, 0x7c, 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0xf, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x4, 0x6a, 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x7, 0xb5, 0xdb, 0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0xc, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x6, 0x68, 0x7f, 0x11, 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0xd, 0x63, 0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34},
- {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0xf, 0x1f, 0x70, 0xc1, 0xae, 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0xe, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x1, 0x6e, 0x7e, 0x11, 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, 0xc2, 0xad, 0x1c, 0x73, 0x63, 0xc, 0xbd, 0xd2, 0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x3, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x2, 0x6d, 0xdc, 0xb3, 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0xd, 0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x8, 0xb9, 0xd6, 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x7, 0x68, 0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x6, 0x69, 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x9, 0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x4, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0xb, 0x1b, 0x74, 0xc5, 0xaa, 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0xa, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x5, 0x6a, 0x7a, 0x15, 0xa4, 0xcb},
- {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea, 0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0xb, 0x9b, 0xeb, 0x1, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb, 0x2, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x8, 0x98, 0xe8, 0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x9, 0x99, 0xe9, 0x3, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0xf, 0x9f, 0xef, 0x5, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b, 0x4, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0xe, 0x9e, 0xee, 0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0xd, 0x9d, 0xed, 0x7, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf, 0x6, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0xc, 0x9c, 0xec},
- {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x9, 0xec, 0x9d, 0xe, 0x7f, 0x35, 0x44, 0xd7, 0xa6, 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x4, 0x4e, 0x3f, 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x3, 0x72, 0xd4, 0xa5, 0x36, 0x47, 0xd, 0x7c, 0xef, 0x9e, 0x7b, 0xa, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, 0x46, 0x37, 0x7d, 0xc, 0x9f, 0xee, 0xb, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x5, 0x74, 0x3e, 0x4f, 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x2, 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x8, 0x79, 0x9c, 0xed, 0x7e, 0xf, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x1, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x6, 0x77, 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13},
- {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, 0x73, 0x1, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x2, 0x70, 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12, 0x95, 0xe7, 0x71, 0x3, 0x40, 0x32, 0xa4, 0xd6, 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, 0x35, 0x47, 0x4, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x5, 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x6, 0x74, 0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x7, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, 0x8, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x9, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a, 0xee, 0x9c, 0xa, 0x78, 0x3b, 0x49, 0xdf, 0xad, 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, 0x79, 0xb, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0xc, 0x7e, 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0xd, 0x9b, 0xe9, 0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0xe, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0xf},
- {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0xd, 0x7e, 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, 0x79, 0xa, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, 0x43, 0x30, 0x74, 0x7, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x4, 0x40, 0x33, 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x9, 0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0xe, 0x7d, 0x39, 0x4a, 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x3, 0x70, 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x8, 0x7b, 0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18, 0xe3, 0x90, 0x5, 0x76, 0x32, 0x41, 0xd4, 0xa7, 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x2, 0x9a, 0xe9, 0x7c, 0xf, 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0xc, 0x99, 0xea, 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x1, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x6, 0x75, 0xe0, 0x93, 0xb, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0},
- {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, 0x94, 0xe0, 0x7c, 0x8, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x3, 0x77, 0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0xb, 0x97, 0xe3, 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, 0x23, 0x57, 0x6, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0xe, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, 0xed, 0x99, 0x5, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0xd, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x4, 0x55, 0x21, 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0xc, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, 0x73, 0x7, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0xf, 0x7b, 0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x2, 0xc7, 0xb3, 0x2f, 0x5b, 0xa, 0x7e, 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x1, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x9, 0x7d, 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37},
- {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x3, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda, 0x6, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, 0x5, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0xc, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5, 0xf, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0xa, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3, 0x9, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, 0x7d, 0x8, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0xb, 0x5d, 0x28, 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, 0x91, 0xe4, 0x7b, 0xe, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0xd, 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42, 0x9b, 0xee, 0x71, 0x4, 0x52, 0x27, 0xb8, 0xcd, 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, 0x72, 0x7, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x2, 0x54, 0x21, 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, 0x9e, 0xeb, 0x74, 0x1, 0x57, 0x22, 0xbd, 0xc8},
- {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0xa, 0xc2, 0xb4, 0x2e, 0x58, 0x7, 0x71, 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x9, 0x7f, 0xe5, 0x93, 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x4, 0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, 0x99, 0xef, 0x75, 0x3, 0x5c, 0x2a, 0xb0, 0xc6, 0xe, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x5, 0x9f, 0xe9, 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x8, 0x7e, 0xe3, 0x95, 0xf, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x2, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0xc, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x1, 0x77, 0x28, 0x5e, 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x6, 0x70, 0xb8, 0xce, 0x54, 0x22, 0x7d, 0xb, 0x91, 0xe7, 0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4},
- {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7, 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0xc, 0x7b, 0xbc, 0xcb, 0x52, 0x25, 0x7d, 0xa, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, 0xca, 0xbd, 0x24, 0x53, 0xb, 0x7c, 0xe5, 0x92, 0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0xd, 0xe9, 0x9e, 0x7, 0x70, 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x1, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e, 0x5, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, 0x9a, 0xed, 0x74, 0x3, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x9, 0x7e, 0xb9, 0xce, 0x57, 0x20, 0x78, 0xf, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, 0x21, 0x56, 0xe, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x8, 0xec, 0x9b, 0x2, 0x75, 0x2d, 0x5a, 0xc3, 0xb4, 0x73, 0x4, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b},
- {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x7, 0x72, 0xa, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x4, 0x71, 0x9, 0x81, 0xf9, 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42, 0x3, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0xe, 0x76, 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44, 0x5, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x8, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, 0x7a, 0x2, 0x77, 0xf, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x1, 0x74, 0xc, 0x84, 0xfc, 0x6, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0xb, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47},
- {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0xd, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x6, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a, 0x7, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0xc, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0xa, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x1, 0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x3, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x8, 0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50, 0xe, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x5, 0x7c, 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x4, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0xf, 0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x9, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x2, 0x7b, 0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8},
- {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c, 0xf3, 0x89, 0x7, 0x7d, 0x6, 0x7c, 0xf2, 0x88, 0x4, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x5, 0x7f, 0xfb, 0x81, 0xf, 0x75, 0xe, 0x74, 0xfa, 0x80, 0xc, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0xd, 0x77, 0x8, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x9, 0x73, 0xff, 0x85, 0xb, 0x71, 0xa, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4},
- {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0xb, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0xc, 0x8f, 0xf4, 0x79, 0x2, 0x7e, 0x5, 0x88, 0xf3, 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x8, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0xf, 0x8c, 0xf7, 0x7a, 0x1, 0x7d, 0x6, 0x8b, 0xf0, 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x3, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x4, 0x7f, 0xfc, 0x87, 0xa, 0x71, 0xd, 0x76, 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b},
- {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x2, 0x86, 0xfa, 0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x4, 0x78, 0x11, 0x6d, 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x6, 0x76, 0xa, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x8, 0x74, 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, 0x72, 0xe, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0xc, 0x70, 0xec, 0x90, 0x14, 0x68, 0x1, 0x7d, 0xf9, 0x85, 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x3, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x5, 0x79, 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, 0x7b, 0x7, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, 0x77, 0xb, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34, 0x9, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0xf, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b, 0xf5, 0x89, 0xd, 0x71, 0x18, 0x64, 0xe0, 0x9c},
- {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x4, 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x8, 0x75, 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, 0x62, 0x1f, 0x71, 0xc, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58, 0xf9, 0x84, 0x3, 0x7e, 0x10, 0x6d, 0xea, 0x97, 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x7, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0xb, 0x76, 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0xf, 0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x2, 0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x6, 0x7b, 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, 0x77, 0xa, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0xe, 0x73, 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x1, 0x86, 0xfb, 0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x5, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x9, 0x67, 0x1a, 0x9d, 0xe0, 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0xd, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63},
- {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x3, 0x7b, 0x5, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x6, 0x78, 0xf6, 0x88, 0xa, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x9, 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0xf, 0x68, 0x16, 0x94, 0xea, 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, 0xc, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0xd, 0x73, 0x14, 0x6a, 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, 0x69, 0x17, 0x70, 0xe, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x8, 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, 0xee, 0x90, 0x12, 0x6c, 0xb, 0x75, 0xf7, 0x89, 0x7, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x4, 0x7c, 0x2, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x1, 0x7f},
- {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x3, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x5, 0x65, 0x1a, 0x9b, 0xe4, 0xf8, 0x87, 0x6, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x9, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0xa, 0x75, 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, 0x13, 0x6c, 0xc, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0xf, 0x8e, 0xf1, 0x71, 0xe, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2, 0xd, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, 0xf5, 0x8a, 0xb, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, 0x77, 0x8, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x7, 0x78, 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x4, 0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x2, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, 0xe0, 0x9f, 0x1e, 0x61, 0x1, 0x7e, 0xff, 0x80},
- {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x1, 0xa6, 0x26, 0xbb, 0x3b, 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, 0xa4, 0x24, 0x83, 0x3, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x2, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6, 0x87, 0x7, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x6, 0x86, 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x4, 0x84, 0x19, 0x99, 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x5, 0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0xe, 0x8e, 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, 0x8f, 0xf, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0xd, 0x36, 0xb6, 0x2b, 0xab, 0xc, 0x8c, 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, 0x94, 0x14, 0x89, 0x9, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb, 0x8, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0xa, 0x8a, 0xb1, 0x31, 0xac, 0x2c, 0x8b, 0xb, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62},
- {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x5, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0xf, 0xaf, 0x2e, 0xb0, 0x31, 0x15, 0x94, 0xa, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x4, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x1, 0x80, 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0xb, 0x8a, 0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0xe, 0x93, 0x12, 0x8c, 0xd, 0xad, 0x2c, 0xb2, 0x33, 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x8, 0x89, 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde, 0x2, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, 0x86, 0x7, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x9, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0xc, 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, 0xa6, 0x27, 0x87, 0x6, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x3, 0x82, 0x1c, 0x9d},
- {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x5, 0x8d, 0xf, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0xa, 0x88, 0x7, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x2, 0x8a, 0x8, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0xd, 0x8f, 0xe, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0xb, 0x83, 0x1, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x4, 0x86, 0x9, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0xc, 0x84, 0x6, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x3, 0x81},
- {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x1, 0x99, 0x1a, 0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x3, 0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb, 0x19, 0x9a, 0x2, 0x81, 0x2f, 0xac, 0x34, 0xb7, 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x6, 0x85, 0x9f, 0x1c, 0x84, 0x7, 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, 0x86, 0x5, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, 0x29, 0xaa, 0x4, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0xd, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, 0x21, 0xa2, 0xc, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0xe, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, 0x97, 0x14, 0x8c, 0xf, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0xb, 0x11, 0x92, 0xa, 0x89, 0x27, 0xa4, 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x8, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x9, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e},
- {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x6, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6, 0x19, 0x9d, 0xc, 0x88, 0x33, 0xb7, 0x26, 0xa2, 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0xa, 0x9a, 0x1e, 0x8f, 0xb, 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0xd, 0x89, 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x7, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x1, 0x85, 0x14, 0x90, 0x29, 0xad, 0x3c, 0xb8, 0x3, 0x87, 0x16, 0x92, 0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x5, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0xf, 0x8b, 0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x9, 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x8, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, 0x1b, 0x9f, 0xe, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x4, 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, 0x2, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9},
- {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x4, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x8, 0x8d, 0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69, 0x89, 0xc, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, 0x86, 0x3, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x7, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0xb, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, 0xf, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x2, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49, 0x11, 0x94, 0x6, 0x83, 0x3f, 0xba, 0x28, 0xad, 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0xa, 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0xe, 0x8b, 0x19, 0x9c, 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, 0x38, 0xbd, 0x1, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x5, 0xb9, 0x3c, 0xae, 0x2b, 0x1e, 0x9b, 0x9, 0x8c, 0x30, 0xb5, 0x27, 0xa2, 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, 0xb1, 0x34, 0x88, 0xd, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46},
- {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, 0x88, 0xe, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0xd, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc, 0x85, 0x3, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, 0xb, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x5, 0xb0, 0x36, 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, 0x17, 0x91, 0x6, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x8, 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x7, 0x81, 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x9, 0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0xa, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x4, 0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, 0xc, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x2, 0x95, 0x13, 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, 0x32, 0xb4, 0x1, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0xf, 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a},
- {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0xc, 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0xb, 0x8c, 0x18, 0x9f, 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x7, 0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8, 0x16, 0x91, 0x5, 0x82, 0x30, 0xb7, 0x23, 0xa4, 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x9, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0xe, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x2, 0x96, 0x11, 0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x6, 0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0xa, 0x8d, 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, 0x99, 0x1e, 0x8a, 0xd, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x1, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x3, 0x97, 0x10, 0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0xf, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71, 0x8f, 0x8, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, 0x4, 0x83, 0x31, 0xb6, 0x22, 0xa5},
- {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27, 0x8c, 0x4, 0x81, 0x9, 0x96, 0x1e, 0x9b, 0x13, 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x1, 0x84, 0xc, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x5, 0x8d, 0x8, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, 0xf, 0x87, 0x2, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28, 0x83, 0xb, 0x8e, 0x6, 0x99, 0x11, 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d, 0x86, 0xe, 0x8b, 0x3, 0x9c, 0x14, 0x91, 0x19, 0xa, 0x82, 0x7, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9},
- {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, 0x88, 0x1, 0x87, 0xe, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59, 0x85, 0xc, 0x8a, 0x3, 0x9b, 0x12, 0x94, 0x1d, 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0xd, 0x84, 0x2, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, 0x81, 0x8, 0x8e, 0x7, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x9, 0x80, 0x6, 0x8f, 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x4, 0x8d, 0xb, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x5, 0x83, 0xa, 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36},
- {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, 0x90, 0x1a, 0x99, 0x13, 0x82, 0x8, 0x8b, 0x1, 0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, 0x19, 0x93, 0x10, 0x9a, 0xb, 0x81, 0x2, 0x88, 0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c, 0x89, 0x3, 0x80, 0xa, 0x9b, 0x11, 0x92, 0x18, 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, 0x16, 0x9c, 0x1f, 0x95, 0x4, 0x8e, 0xd, 0x87, 0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33, 0x86, 0xc, 0x8f, 0x5, 0x94, 0x1e, 0x9d, 0x17, 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0xf, 0x85, 0x6, 0x8c, 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x7, 0x84, 0xe, 0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a},
- {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x1, 0x81, 0xa, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3, 0x9, 0x82, 0x2, 0x89, 0x1f, 0x94, 0x14, 0x9f, 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, 0x83, 0x8, 0x88, 0x3, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x5, 0x85, 0xe, 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, 0x12, 0x99, 0x19, 0x92, 0x4, 0x8f, 0xf, 0x84, 0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0xc, 0x8c, 0x7, 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, 0xd, 0x86, 0x6, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5},
- {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, 0x88, 0x4, 0x8d, 0x1, 0x82, 0xe, 0x87, 0xb, 0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, 0xd, 0x81, 0x8, 0x84, 0x7, 0x8b, 0x2, 0x8e, 0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, 0x85, 0x9, 0x80, 0xc, 0x8f, 0x3, 0x8a, 0x6, 0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12},
- {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, 0x8c, 0x1, 0x8b, 0x6, 0x82, 0xf, 0x85, 0x8, 0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, 0x89, 0x4, 0x8e, 0x3, 0x87, 0xa, 0x80, 0xd, 0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5, 0x5, 0x88, 0x2, 0x8f, 0xb, 0x86, 0xc, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed},
- {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89, 0x8, 0x86, 0x9, 0x87, 0xa, 0x84, 0xb, 0x85, 0xc, 0x82, 0xd, 0x83, 0xe, 0x80, 0xf, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1},
- {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x7, 0x8b, 0x4, 0x8e, 0x1, 0x8d, 0x2, 0x84, 0xb, 0x87, 0x8, 0x82, 0xd, 0x81, 0xe},
- {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x1, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20, 0x3, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4, 0x2, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x7, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0, 0x6, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, 0x4, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26, 0x5, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, 0xf, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0xe, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d, 0xc, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0xd, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x8, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, 0x9, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0xb, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc, 0xa, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29},
- {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, 0x9b, 0xa, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x7, 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, 0xd, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0xe, 0x9f, 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, 0x95, 0x4, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x9, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, 0x7d, 0xec, 0x42, 0xd3, 0x3, 0x92, 0x3c, 0xad, 0x7b, 0xea, 0x44, 0xd5, 0x5, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0xf, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, 0xac, 0x3d, 0x93, 0x2, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x8, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, 0x34, 0xa5, 0xb, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x1, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0xc, 0xa2, 0x33, 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x6, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6},
- {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0xc, 0x31, 0xa3, 0x8, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, 0x73, 0xe1, 0x4a, 0xd8, 0x1, 0x93, 0x38, 0xaa, 0x97, 0x5, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0xd, 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x9, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x6, 0xad, 0x3f, 0x2, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, 0x33, 0xa1, 0xa, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0xe, 0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x7, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, 0x3, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0xb, 0x99, 0xa4, 0x36, 0x9d, 0xf, 0xd6, 0x44, 0xef, 0x7d, 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca},
- {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32, 0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x4, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0xd, 0x9e, 0x36, 0xa5, 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x8, 0x9b, 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x1, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, 0x9f, 0xc, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x5, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0xb, 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x2, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, 0xf, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x6, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, 0x90, 0x3, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, 0x47, 0xd4, 0xa, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0, 0x7, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0xe, 0xa6, 0x35},
- {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0xb, 0x9f, 0x3e, 0xaa, 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, 0x96, 0x2, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x9, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, 0x3a, 0xae, 0xf, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x4, 0x90, 0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44, 0x99, 0xd, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x6, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0xa, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, 0xa0, 0x34, 0x95, 0x1, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, 0x8, 0x9c, 0x3d, 0xa9, 0x3, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x5, 0xa4, 0x30, 0x9a, 0xe, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x7, 0x93, 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0xc, 0x98, 0x53, 0xc7, 0x66, 0xf2},
- {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, 0xa5, 0x30, 0x92, 0x7, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0xe, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x9, 0xab, 0x3e, 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, 0x99, 0xc, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe, 0xb, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x2, 0xa0, 0x35, 0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x5, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, 0x9d, 0x8, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0xf, 0x9a, 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x6, 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, 0x58, 0xcd, 0x1, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x4, 0x91, 0x5d, 0xc8, 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, 0x96, 0x3, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, 0xa, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0xd},
- {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1, 0x95, 0x3, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x2, 0x94, 0x37, 0xa1, 0x6, 0x90, 0x55, 0xc3, 0x64, 0xf2, 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x7, 0xa0, 0x36, 0xa2, 0x34, 0x93, 0x5, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, 0x4, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0xc, 0x9a, 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0xd, 0xc8, 0x5e, 0xf9, 0x6f, 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0xf, 0xa8, 0x3e, 0x3f, 0xa9, 0xe, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0xa, 0x9c, 0x9d, 0xb, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x9, 0x8, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11},
- {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x5, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0xb, 0x9c, 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0xf, 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x1, 0x96, 0x32, 0xa5, 0x39, 0xae, 0xa, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x4, 0xa0, 0x37, 0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x9, 0x9e, 0x90, 0x7, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0xc, 0x2, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x6, 0xa2, 0x35, 0x3b, 0xac, 0x8, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, 0x65, 0xf2, 0x56, 0xc1, 0x3, 0x94, 0x30, 0xa7, 0xa9, 0x3e, 0x9a, 0xd, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee},
- {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x2, 0x9a, 0xc1, 0x59, 0xec, 0x74, 0x9b, 0x3, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x5, 0x5e, 0xc6, 0x73, 0xeb, 0x4, 0x9c, 0x29, 0xb1, 0x9f, 0x7, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, 0x6, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, 0x93, 0xb, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0xa, 0x92, 0xbc, 0x24, 0x91, 0x9, 0xe6, 0x7e, 0xcb, 0x53, 0x8, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, 0xe, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0xf, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0xc, 0x94, 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0xd, 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82},
- {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, 0xca, 0x53, 0xe5, 0x7c, 0x94, 0xd, 0xbb, 0x22, 0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x7, 0x9e, 0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x4, 0x50, 0xc9, 0x7f, 0xe6, 0xe, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, 0x26, 0xbf, 0x9, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x3, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0xf, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x5, 0xed, 0x74, 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x2, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x8, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0xb, 0x5f, 0xc6, 0x70, 0xe9, 0x1, 0x98, 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, 0x6, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0xc, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d},
- {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, 0x55, 0xcf, 0x7c, 0xe6, 0x7, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b, 0xe, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, 0x5b, 0xc1, 0x72, 0xe8, 0x9, 0x93, 0x20, 0xba, 0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0xc, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd, 0xb8, 0x22, 0x91, 0xb, 0xea, 0x70, 0xc3, 0x59, 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x2, 0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x5, 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3, 0x92, 0x8, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, 0xee, 0x74, 0x95, 0xf, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9, 0x9c, 0x6, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x1, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x4, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, 0x2a, 0xb0, 0x3, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0xa, 0x90, 0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0xd, 0x97, 0x76, 0xec, 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61},
- {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0xf, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, 0x26, 0xbd, 0xd, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x2, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85, 0x9, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x6, 0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x4, 0x9f, 0x79, 0xe2, 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0xb, 0xbb, 0x20, 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0xe, 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x1, 0x9a, 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, 0x98, 0x3, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0xc, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, 0xb7, 0x2c, 0x9c, 0x7, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, 0x75, 0xee, 0x8, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0xa, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x5, 0x9e},
- {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x5, 0x99, 0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0xd, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x2, 0xbb, 0x27, 0x40, 0xdc, 0x65, 0xf9, 0xa, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x7, 0xf4, 0x68, 0xd1, 0x4d, 0x2a, 0xb6, 0xf, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0xc, 0xff, 0x63, 0xda, 0x46, 0x21, 0xbd, 0x4, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, 0x95, 0x9, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x1, 0x9d, 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0xe, 0x92, 0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x6, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, 0xb, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x3, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59},
- {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, 0x2, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x3, 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x4, 0x99, 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x5, 0xbf, 0x22, 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x6, 0x9b, 0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x7, 0x94, 0x9, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60, 0x8, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0xb, 0xff, 0x62, 0xd8, 0x45, 0x2d, 0xb0, 0xa, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, 0xf9, 0x64, 0x90, 0xd, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, 0xc, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0xf, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0xe, 0x93, 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6},
- {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8, 0x91, 0xf, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, 0x2a, 0xb4, 0xb, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x4, 0xf9, 0x67, 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0xc, 0xb3, 0x2d, 0x41, 0xdf, 0x60, 0xfe, 0x3, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x7, 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x8, 0x96, 0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0xd, 0x93, 0x6e, 0xf0, 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x2, 0xff, 0x61, 0xde, 0x40, 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x6, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb, 0x97, 0x9, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x1, 0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0xe, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0xa, 0xb5, 0x2b, 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, 0x5, 0x9b, 0x24, 0xba},
- {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, 0x5, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0xa, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c, 0xf, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x7, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb, 0x9d, 0x2, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0xd, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x8, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d, 0x28, 0xb7, 0xb, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, 0xe, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x1, 0x9e, 0x64, 0xfb, 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, 0x27, 0xb8, 0x4, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0xc, 0xf6, 0x69, 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, 0xb5, 0x2a, 0x96, 0x9, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x6, 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x3, 0xf9, 0x66, 0xda, 0x45},
- {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x1, 0xa1, 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x3, 0xa3, 0x5e, 0xfe, 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x2, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28, 0x6, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, 0xbd, 0x1d, 0xe0, 0x40, 0x7, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x5, 0xa5, 0x58, 0xf8, 0x4, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, 0xd, 0xad, 0x50, 0xf0, 0xc, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0xe, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, 0xf, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0xb, 0xab, 0x56, 0xf6, 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0xa, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d, 0x8, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, 0xee, 0x4e, 0x9, 0xa9, 0x54, 0xf4},
- {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, 0xa3, 0x2, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x6, 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x4, 0xa5, 0xe5, 0x44, 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0xe, 0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0xc, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x8, 0xa9, 0x57, 0xf6, 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, 0xab, 0xa, 0xf4, 0x55, 0x5e, 0xff, 0x1, 0xa0, 0xe0, 0x41, 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, 0xa2, 0x3, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x7, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6, 0x5, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, 0xae, 0xf, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0xd, 0xac, 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x9, 0xa8, 0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0xb},
- {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, 0xab, 0x9, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x2, 0x4b, 0xe9, 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, 0xb, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x4, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0xd, 0xf6, 0x54, 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x6, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0xf, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, 0xe3, 0x41, 0x8, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x1, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0xa, 0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, 0x3, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65, 0x55, 0xf7, 0xc, 0xae, 0xe7, 0x45, 0xbe, 0x1c, 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x5, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0xe, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, 0x5e, 0xfc, 0x7, 0xa5, 0xec, 0x4e, 0xb5, 0x17},
- {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0xf, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0xb, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x4, 0xaf, 0xc, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x3, 0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38, 0x7, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x8, 0xab, 0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0xd, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c, 0xa1, 0x2, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x6, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36, 0x9, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x1, 0xa2, 0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0xe, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0xa, 0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x5, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8},
- {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b, 0xab, 0xf, 0xfe, 0x5a, 0x1, 0xa5, 0x54, 0xf0, 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb, 0xa9, 0xd, 0xfc, 0x58, 0x3, 0xa7, 0x56, 0xf2, 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x2, 0xa6, 0x57, 0xf3, 0xa8, 0xc, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, 0xad, 0x9, 0xf8, 0x5c, 0x7, 0xa3, 0x52, 0xf6, 0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x6, 0xa2, 0x53, 0xf7, 0xac, 0x8, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84, 0x4, 0xa0, 0x51, 0xf5, 0xae, 0xa, 0xfb, 0x5f, 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0xb, 0xfa, 0x5e, 0x5, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f},
- {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, 0x58, 0xfd, 0xf, 0xaa, 0xf6, 0x53, 0xa1, 0x4, 0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, 0xa6, 0x3, 0x5f, 0xfa, 0x8, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36, 0xa9, 0xc, 0xfe, 0x5b, 0x7, 0xa2, 0x50, 0xf5, 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba, 0xa7, 0x2, 0xf0, 0x55, 0x9, 0xac, 0x5e, 0xfb, 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0xd, 0x51, 0xf4, 0x6, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2, 0x56, 0xf3, 0x1, 0xa4, 0xf8, 0x5d, 0xaf, 0xa, 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13, 0xe, 0xab, 0x59, 0xfc, 0xa0, 0x5, 0xf7, 0x52, 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0},
- {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, 0xf2, 0x54, 0xa3, 0x5, 0x50, 0xf6, 0x1, 0xa7, 0xab, 0xd, 0xfa, 0x5c, 0x9, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0xe, 0x5b, 0xfd, 0xa, 0xac, 0xa0, 0x6, 0xf1, 0x57, 0x2, 0xa4, 0x53, 0xf5, 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0xb, 0xad, 0x5a, 0xfc, 0xa9, 0xf, 0xf8, 0x5e, 0x52, 0xf4, 0x3, 0xa5, 0xf0, 0x56, 0xa1, 0x7, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc},
- {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3, 0xa2, 0x5, 0xf1, 0x56, 0x4, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x7, 0x55, 0xf2, 0x6, 0xa1, 0x59, 0xfe, 0xa, 0xad, 0xff, 0x58, 0xac, 0xb, 0x8, 0xaf, 0x5b, 0xfc, 0xae, 0x9, 0xfd, 0x5a, 0xfb, 0x5c, 0xa8, 0xf, 0x5d, 0xfa, 0xe, 0xa9, 0xaa, 0xd, 0xf9, 0x5e, 0xc, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33},
- {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x4, 0xa4, 0xc, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x8, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x3, 0x7, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0xf, 0xa7, 0xa3, 0xb, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x2, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x6, 0xae, 0xe, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0xa, 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x1, 0xa9, 0xad, 0x5, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0xd, 0x9, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f},
- {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, 0x42, 0xeb, 0xd, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0xc, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, 0xe7, 0x4e, 0xa8, 0x1, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x3, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0xe, 0x91, 0x38, 0xde, 0x77, 0xf, 0xa6, 0x40, 0xe9, 0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x2, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, 0xb, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x6, 0xaf, 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x7, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, 0xec, 0x45, 0xa3, 0xa, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x8, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x5, 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, 0x4, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x9, 0xa0},
- {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x3, 0x4b, 0xe1, 0x2, 0xa8, 0xd9, 0x73, 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x7, 0x76, 0xdc, 0x3f, 0x95, 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x6, 0xac, 0x96, 0x3c, 0xdf, 0x75, 0x4, 0xae, 0x4d, 0xe7, 0xaf, 0x5, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0xe, 0xa4, 0xec, 0x46, 0xa5, 0xf, 0x7e, 0xd4, 0x37, 0x9d, 0xa7, 0xd, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, 0xd7, 0x7d, 0xc, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x9, 0xea, 0x40, 0x8, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79, 0x43, 0xe9, 0xa, 0xa0, 0xd1, 0x7b, 0x98, 0x32, 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0xb, 0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc},
- {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83, 0xa6, 0xd, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x1, 0xaa, 0x4a, 0xe1, 0x95, 0x3e, 0xde, 0x75, 0x3, 0xa8, 0x48, 0xe3, 0xa4, 0xf, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, 0x78, 0xd3, 0xa5, 0xe, 0xee, 0x45, 0x2, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0xa, 0xea, 0x41, 0x6, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x7, 0xac, 0x4c, 0xe7, 0xa0, 0xb, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x9, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, 0x5, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x4, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x8, 0xe8, 0x43},
- {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x2, 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0xb, 0xe2, 0x4e, 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0xd, 0xa1, 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x4, 0xa8, 0xcb, 0x67, 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0xf, 0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x6, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, 0x8, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x1, 0xad, 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0xa, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x3, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x5, 0xa9, 0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0xc, 0xa0, 0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96, 0xee, 0x42, 0xab, 0x7, 0x64, 0xc8, 0x21, 0x8d, 0xe7, 0x4b, 0xa2, 0xe, 0x6d, 0xc1, 0x28, 0x84},
- {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65, 0x2, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66, 0x3, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, 0x4, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x5, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x6, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62, 0x7, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63, 0x8, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, 0x9, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0xa, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0xb, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f, 0xc, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68, 0xd, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, 0xe, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0xf, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b},
- {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x7, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x9, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, 0x4f, 0xe1, 0xe, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0xb, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0xc, 0x61, 0xcf, 0x20, 0x8e, 0xac, 0x2, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x5, 0xab, 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0xf, 0xa1, 0x4e, 0xe0, 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x8, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x6, 0x6b, 0xc5, 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x1, 0xaf, 0x45, 0xeb, 0x4, 0xaa, 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x3, 0x21, 0x8f, 0x60, 0xce, 0xa3, 0xd, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e, 0xa, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67},
- {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, 0xa4, 0xb, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59, 0x44, 0xeb, 0x7, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0xc, 0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0xe, 0xa1, 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, 0xaa, 0x5, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x9, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7, 0xee, 0x41, 0xad, 0x2, 0x68, 0xc7, 0x2b, 0x84, 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0xd, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x6, 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, 0x49, 0xe6, 0xa, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x1, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x3, 0xac, 0x40, 0xef, 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x8, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x4, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, 0xe3, 0x4c, 0xa0, 0xf, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98},
- {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x2, 0x35, 0x85, 0x48, 0xf8, 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x4, 0xb4, 0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x6, 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, 0x8f, 0x3f, 0x8, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0xa, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, 0x71, 0xc1, 0xc, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0xe, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x1, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, 0xf9, 0x49, 0x84, 0x34, 0x3, 0xb3, 0x7e, 0xce, 0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x5, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b, 0x7a, 0xca, 0x7, 0xb7, 0x80, 0x30, 0xfd, 0x4d, 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x9, 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5, 0xb, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0xd, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0xf, 0xbf},
- {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0xe, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0xd, 0xc3, 0x72, 0x7c, 0xcd, 0x3, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b, 0xba, 0xb, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x5, 0xb4, 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, 0x87, 0x36, 0x6, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x8, 0x38, 0x89, 0x47, 0xf6, 0xb6, 0x7, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x9, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, 0xf4, 0x45, 0x8b, 0x3a, 0xa, 0xbb, 0x75, 0xc4, 0xca, 0x7b, 0xb5, 0x4, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd, 0xc, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x2, 0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, 0xb0, 0x1, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, 0x70, 0xc1, 0xf, 0xbe, 0x8e, 0x3f, 0xf1, 0x40},
- {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, 0xc3, 0x71, 0xba, 0x8, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x3, 0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, 0xd, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x6, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, 0xb7, 0x5, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, 0x37, 0x85, 0xbc, 0xe, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0xf, 0x36, 0x84, 0x4f, 0xfd, 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x4, 0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28, 0x7, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0xc, 0xbe, 0x75, 0xc7, 0xb0, 0x2, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, 0x30, 0x82, 0xbb, 0x9, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f, 0x73, 0xc1, 0xa, 0xb8, 0x81, 0x33, 0xf8, 0x4a, 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x1, 0xb3, 0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c},
- {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf, 0xff, 0x4c, 0x84, 0x37, 0x9, 0xba, 0x72, 0xc1, 0xe, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x6, 0xc9, 0x7a, 0xb2, 0x1, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0xf, 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x8, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0xd, 0xc5, 0x76, 0xb9, 0xa, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, 0xb7, 0x4, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x3, 0xcb, 0x78, 0x70, 0xc3, 0xb, 0xb8, 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0xc, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x2, 0xb1, 0x7e, 0xcd, 0x5, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3},
- {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d, 0x3, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, 0xca, 0x7e, 0xbf, 0xb, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e, 0x6, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0xe, 0x25, 0x91, 0x50, 0xe4, 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x5, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0xd, 0x26, 0x92, 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0xc, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x4, 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, 0xf, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, 0xb3, 0x7, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62, 0xa, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, 0xc3, 0x77, 0xb6, 0x2, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67, 0x9, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x1, 0x2a, 0x9e, 0x5f, 0xeb, 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64},
- {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x6, 0xb3, 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x5, 0xc7, 0x72, 0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0xf, 0xe2, 0x57, 0x95, 0x20, 0xc, 0xb9, 0x7b, 0xce, 0xbc, 0x9, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90, 0x7d, 0xc8, 0xa, 0xbf, 0x93, 0x26, 0xe4, 0x51, 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, 0xbd, 0x8, 0x24, 0x91, 0x53, 0xe6, 0xb, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0xe, 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0xd, 0xb8, 0xe9, 0x5c, 0x9e, 0x2b, 0x7, 0xb2, 0x70, 0xc5, 0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x4, 0x76, 0xc3, 0x1, 0xb4, 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x2, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b},
- {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x7, 0xb1, 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x9, 0xbf, 0xec, 0x5a, 0x9d, 0x2b, 0xe, 0xb8, 0x7f, 0xc9, 0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0xb, 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0xc, 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, 0xb4, 0x2, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f, 0xb3, 0x5, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, 0xb9, 0xf, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x8, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x6, 0xc1, 0x77, 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x1, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x4, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, 0x90, 0x26, 0x3, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1, 0xd, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28, 0x7b, 0xcd, 0xa, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87},
- {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x8, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x5, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe, 0xd, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x2, 0x20, 0x97, 0x53, 0xe4, 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, 0xa, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87, 0x74, 0xc3, 0x7, 0xb0, 0x92, 0x25, 0xe1, 0x56, 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, 0xb8, 0xf, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x4, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0xc, 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x1, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x9, 0xbe, 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x6, 0xc2, 0x75, 0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, 0xe, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, 0xe5, 0x52, 0x96, 0x21, 0x3, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, 0xbc, 0xb, 0xcf, 0x78},
- {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x4, 0xb, 0xb3, 0x66, 0xde, 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x3, 0xbb, 0xb4, 0xc, 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, 0xbf, 0x7, 0xd2, 0x6a, 0x65, 0xdd, 0x8, 0xb0, 0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0xd, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x2, 0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x9, 0x6, 0xbe, 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0xe, 0xb6, 0xb9, 0x1, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0xa, 0xdf, 0x67, 0x68, 0xd0, 0x5, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14},
- {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6, 0xbe, 0x7, 0xd1, 0x68, 0x60, 0xd9, 0xf, 0xb6, 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0xe, 0xb7, 0xbf, 0x6, 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x9, 0x1, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0xc, 0xb5, 0xbd, 0x4, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, 0xb2, 0xb, 0x3, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, 0x2, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0xa, 0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15, 0xbc, 0x5, 0xd3, 0x6a, 0x62, 0xdb, 0xd, 0xb4, 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb},
- {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8, 0x6f, 0xd5, 0x6, 0xbc, 0xbd, 0x7, 0xd4, 0x6e, 0xd6, 0x6c, 0xbf, 0x5, 0x4, 0xbe, 0x6d, 0xd7, 0xde, 0x64, 0xb7, 0xd, 0xc, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0xe, 0xb4, 0xb5, 0xf, 0xdc, 0x66, 0xb1, 0xb, 0xd8, 0x62, 0x63, 0xd9, 0xa, 0xb0, 0x8, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x9, 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7},
- {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7, 0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0xb, 0xb0, 0xb6, 0xd, 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x1, 0x7, 0xbc, 0x6c, 0xd7, 0xdf, 0x64, 0xb4, 0xf, 0x9, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, 0x5, 0xbe, 0xb8, 0x3, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77, 0xbf, 0x4, 0xd4, 0x6f, 0x69, 0xd2, 0x2, 0xb9, 0xe, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x8},
- {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a, 0xf, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0xd, 0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x2, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, 0xb5, 0x9, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x6, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x4, 0xb8, 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0xb, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0xe, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x1, 0xd8, 0x64, 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, 0x3, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0xc, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, 0xa8, 0x14, 0x7, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x8, 0xb4, 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, 0xd3, 0x6f, 0xb6, 0xa, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x5, 0x16, 0xaa, 0x73, 0xcf},
- {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0xb, 0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x2, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x9, 0xb4, 0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0xf, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x4, 0xb9, 0xad, 0x10, 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, 0xa4, 0x19, 0xd, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x6, 0x12, 0xaf, 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0xa, 0xd0, 0x6d, 0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x1, 0xbc, 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x8, 0xb5, 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, 0xd9, 0x64, 0xbe, 0x3, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90, 0x5, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0xe, 0xba, 0x7, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0xc, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30},
- {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x8, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, 0xc7, 0x79, 0xa6, 0x18, 0x5, 0xbb, 0x64, 0xda, 0x71, 0xcf, 0x10, 0xae, 0xb3, 0xd, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x2, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0xa, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x7, 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0xf, 0xb1, 0x6e, 0xd0, 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, 0x4, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0xc, 0x11, 0xaf, 0x70, 0xce, 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x1, 0xbf, 0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x9, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x6, 0x1b, 0xa5, 0x7a, 0xc4, 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0xe, 0xb0, 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x3, 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0xb, 0xb5, 0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c},
- {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0xb, 0xb4, 0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x7, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0xc, 0x16, 0xa9, 0x75, 0xca, 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, 0xe, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, 0x5, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x9, 0xbd, 0x2, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0xd, 0xd1, 0x6e, 0xda, 0x65, 0xb9, 0x6, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81, 0xa, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x1, 0xbe, 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x3, 0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c, 0xb7, 0x8, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, 0x67, 0xd8, 0x4, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0xf, 0xb0, 0x6c, 0xd3},
- {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, 0x1, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x2, 0xc2, 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x3, 0xc3, 0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30, 0x4, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, 0x98, 0x58, 0x5, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, 0x6, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x7, 0xc7, 0x94, 0x54, 0x9, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x8, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0xb, 0xcb, 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, 0xb0, 0x70, 0xa, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4, 0x90, 0x50, 0xd, 0xcd, 0xb7, 0x77, 0x2a, 0xea, 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0xc, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0xf, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, 0x29, 0xe9, 0xb4, 0x74, 0xe, 0xce, 0x93, 0x53},
- {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1, 0xca, 0xb, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, 0x5, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0xe, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2, 0xa, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x1, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd, 0xf, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x4, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0xc, 0x71, 0xb0, 0xee, 0x2f, 0x98, 0x59, 0x7, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x9, 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x2, 0xc3, 0xbe, 0x7f, 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, 0xc7, 0x6, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0xd, 0xcc, 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, 0x5d, 0x9c, 0xc2, 0x3, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, 0x8, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac},
- {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a, 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0xa, 0xc8, 0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0xf, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x8, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0xd, 0xcf, 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x2, 0x28, 0xea, 0xb1, 0x73, 0x7, 0xc5, 0x9e, 0x5c, 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x1, 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x4, 0xc6, 0x50, 0x92, 0xc9, 0xb, 0x7f, 0xbd, 0xe6, 0x24, 0xe, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, 0xbf, 0x7d, 0x9, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0xc, 0x9a, 0x58, 0x3, 0xc1, 0xb5, 0x77, 0x2c, 0xee, 0xc4, 0x6, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0},
- {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x1, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0xf, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, 0x26, 0xe5, 0xbd, 0x7e, 0xd, 0xce, 0x96, 0x55, 0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x3, 0xcf, 0xc, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x2, 0xc1, 0xb2, 0x71, 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x9, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x7, 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x8, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x6, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, 0xb4, 0x77, 0x4, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0xa, 0xc6, 0x5, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5, 0x90, 0x53, 0xb, 0xc8, 0xbb, 0x78, 0x20, 0xe3, 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f},
- {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x7, 0xc3, 0xcb, 0xf, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, 0x57, 0x93, 0xc2, 0x6, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, 0xac, 0x68, 0xe, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x1, 0x50, 0x94, 0x9c, 0x58, 0x9, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0xc, 0xc8, 0xc0, 0x4, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0xb, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x3, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0xa, 0x5b, 0x9f, 0x97, 0x53, 0x2, 0xc6, 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, 0x5c, 0x98, 0xc9, 0xd, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, 0xa7, 0x63, 0x5, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88},
- {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7, 0xcc, 0x9, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0xe, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, 0x5, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x2, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0xc, 0x6d, 0xa8, 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0xb, 0xce, 0x9c, 0x59, 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0xf, 0xca, 0xcd, 0x8, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x6, 0x1, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0xa, 0xcf, 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0xd, 0x6c, 0xa9, 0xfb, 0x3e, 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x3, 0x51, 0x94, 0x93, 0x56, 0x4, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77},
- {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x5, 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, 0xa, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0xf, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x7, 0xc1, 0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x2, 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0xd, 0xcb, 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, 0xce, 0x8, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0xb, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, 0x31, 0xf7, 0xa0, 0x66, 0xe, 0xc8, 0x9f, 0x59, 0x56, 0x90, 0xc7, 0x1, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x4, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0xc, 0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x9, 0xcf, 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x6, 0x51, 0x97, 0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x3, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b},
- {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, 0x9, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x2, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, 0x5f, 0x98, 0xcc, 0xb, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, 0x4, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14, 0xd, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, 0x52, 0x95, 0xc1, 0x6, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1, 0xc8, 0xf, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x8, 0xcf, 0x9b, 0x5c, 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x1, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0xa, 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x3, 0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0xc, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, 0x3e, 0xf9, 0xad, 0x6a, 0x5, 0xc2, 0x96, 0x51, 0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0xe, 0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x7, 0x53, 0x94},
- {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0xd, 0x4f, 0x87, 0xc2, 0xa, 0x46, 0x8e, 0xcb, 0x3, 0x41, 0x89, 0xcc, 0x4, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0xc, 0x49, 0x81, 0xc3, 0xb, 0x4e, 0x86, 0xca, 0x2, 0x47, 0x8f, 0xcd, 0x5, 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, 0x1, 0xc9, 0x8b, 0x43, 0x6, 0xce, 0x82, 0x4a, 0xf, 0xc7, 0x85, 0x4d, 0x8, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8},
- {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43, 0xc, 0xc5, 0x83, 0x4a, 0xf, 0xc6, 0x80, 0x49, 0xa, 0xc3, 0x85, 0x4c, 0x9, 0xc0, 0x86, 0x4f, 0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, 0x48, 0x81, 0xc7, 0xe, 0x4b, 0x82, 0xc4, 0xd, 0x4e, 0x87, 0xc1, 0x8, 0x4d, 0x84, 0xc2, 0xb, 0x44, 0x8d, 0xcb, 0x2, 0x47, 0x8e, 0xc8, 0x1, 0x42, 0x8b, 0xcd, 0x4, 0x41, 0x88, 0xce, 0x7},
- {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, 0x44, 0x8e, 0xcd, 0x7, 0x4b, 0x81, 0xc2, 0x8, 0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2, 0xcc, 0x6, 0x45, 0x8f, 0xc3, 0x9, 0x4a, 0x80, 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, 0x1, 0xcb, 0x87, 0x4d, 0xe, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0xb, 0x48, 0x82, 0xce, 0x4, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93, 0x85, 0x4f, 0xc, 0xc6, 0x8a, 0x40, 0x3, 0xc9, 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb, 0xd, 0xc7, 0x84, 0x4e, 0x2, 0xc8, 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27, 0x49, 0x83, 0xc0, 0xa, 0x46, 0x8c, 0xcf, 0x5, 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b},
- {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, 0xc5, 0xe, 0x45, 0x8e, 0xce, 0x5, 0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x1, 0xca, 0x81, 0x4a, 0xa, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, 0xc4, 0xf, 0x4f, 0x84, 0xcf, 0x4, 0x44, 0x8f, 0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, 0x47, 0x8c, 0xcc, 0x7, 0x4c, 0x87, 0xc7, 0xc, 0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78, 0x9, 0xc2, 0x82, 0x49, 0x2, 0xc9, 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, 0xcd, 0x6, 0x46, 0x8d, 0xc6, 0xd, 0x4d, 0x86, 0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, 0x83, 0x48, 0x8, 0xc3, 0x88, 0x43, 0x3, 0xc8, 0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4},
- {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x2, 0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x4, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, 0xca, 0x6, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0xa, 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x8, 0xc4, 0x8d, 0x41, 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0xe, 0x47, 0x8b, 0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0xc, 0xc0, 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x1, 0x48, 0x84, 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, 0x3, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x5, 0x5b, 0x97, 0xde, 0x12, 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, 0x95, 0x59, 0x7, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0xb, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, 0xc5, 0x9, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0xf, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0xd, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23},
- {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x6, 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0xc, 0xc1, 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, 0x53, 0x9e, 0xc7, 0xa, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74, 0xb, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, 0xc0, 0xd, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x7, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x1, 0x4b, 0x86, 0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x3, 0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x5, 0xc8, 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, 0xc2, 0xf, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x9, 0xc4, 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x8, 0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, 0xe, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6, 0xc9, 0x4, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, 0x2, 0xcf, 0x96, 0x5b, 0x11, 0xdc},
- {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, 0x42, 0x8c, 0xc3, 0xd, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x8, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x5, 0xcb, 0x9b, 0x55, 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x2, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0xf, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0xa, 0xc4, 0x8b, 0x45, 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x7, 0xc7, 0x9, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, 0x4, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f, 0x1, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0xc, 0x5c, 0x92, 0xdd, 0x13, 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, 0xb, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x6, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x3, 0x4c, 0x82, 0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0xe, 0xc0},
- {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0xd, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0xc, 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, 0x1, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x3, 0xcc, 0xc1, 0xe, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0xf, 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x2, 0xcd, 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, 0x47, 0x88, 0xc4, 0xb, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x6, 0xc9, 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x7, 0xc8, 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0xa, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x8, 0x44, 0x8b, 0x86, 0x49, 0x5, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65, 0x4, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x9, 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f},
- {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x1, 0x6c, 0xbc, 0xb6, 0x66, 0xb, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, 0x2, 0xd2, 0xd8, 0x8, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, 0xd3, 0x3, 0x9, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0xf, 0xdf, 0xd5, 0x5, 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, 0xde, 0xe, 0x4, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9, 0xd, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x7, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0xc, 0x61, 0xb1, 0xbb, 0x6b, 0x6, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18},
- {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, 0x68, 0xb9, 0xd7, 0x6, 0xb, 0xda, 0xb4, 0x65, 0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x7, 0xd6, 0xdb, 0xa, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x1, 0x6f, 0xbe, 0xb3, 0x62, 0xc, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, 0xd2, 0x3, 0xe, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce, 0x5, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x8, 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, 0xd5, 0x4, 0x6a, 0xbb, 0xb6, 0x67, 0x9, 0xd8, 0xbd, 0x6c, 0x2, 0xd3, 0xde, 0xf, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7},
- {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda, 0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x7, 0xd5, 0xd1, 0x3, 0x68, 0xba, 0x60, 0xb2, 0xd9, 0xb, 0xf, 0xdd, 0xb6, 0x64, 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, 0xd8, 0xa, 0xe, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x6, 0xd4, 0xd0, 0x2, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45, 0xdf, 0xd, 0x66, 0xb4, 0xb0, 0x62, 0x9, 0xdb, 0x1, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x5, 0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb},
- {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5, 0xb1, 0x62, 0xa, 0xd9, 0xda, 0x9, 0x61, 0xb2, 0x67, 0xb4, 0xdc, 0xf, 0xc, 0xdf, 0xb7, 0x64, 0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, 0x60, 0xb3, 0xdb, 0x8, 0xb, 0xd8, 0xb0, 0x63, 0xb6, 0x65, 0xd, 0xde, 0xdd, 0xe, 0x66, 0xb5, 0xd1, 0x2, 0x6a, 0xb9, 0xba, 0x69, 0x1, 0xd2, 0x7, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x4},
- {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x3, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x6, 0xd2, 0xb3, 0x67, 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, 0x5, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0xf, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, 0xc, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, 0x9, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0xa, 0xde, 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0xb, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x8, 0x1e, 0xca, 0xab, 0x7f, 0xd9, 0xd, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0xe, 0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x4, 0x12, 0xc6, 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x7, 0x66, 0xb2, 0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x2, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, 0xd5, 0x1, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3},
- {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x4, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, 0xbf, 0x6a, 0x8, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0xc, 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x1, 0x10, 0xc5, 0xa7, 0x72, 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, 0x5, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x9, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, 0xd, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x2, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x6, 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, 0xce, 0x1b, 0xa, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0xe, 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x3, 0x61, 0xb4, 0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, 0x7, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0xb, 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0xf, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c},
- {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x7, 0xdf, 0x9, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0xe, 0xd8, 0xa3, 0x75, 0x12, 0xc4, 0xdc, 0xa, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a, 0xbc, 0x6a, 0xd, 0xdb, 0xc3, 0x15, 0x72, 0xa4, 0x7c, 0xaa, 0xcd, 0x1b, 0x3, 0xd5, 0xb2, 0x64, 0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x4, 0x1c, 0xca, 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, 0xa5, 0x73, 0x14, 0xc2, 0xda, 0xc, 0x6b, 0xbd, 0xba, 0x6c, 0xb, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x5, 0xd3, 0xb4, 0x62, 0x65, 0xb3, 0xd4, 0x2, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0, 0x6, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x1, 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0xf, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x8, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20},
- {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee, 0x7, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, 0x9, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0xe, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0xa, 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0xd, 0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x3, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x4, 0xdb, 0xc, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35, 0xdc, 0xb, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x5, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x2, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x6, 0xd1, 0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x1, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0xf, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x8, 0xdf},
- {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc, 0x1, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x2, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe, 0x3, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, 0x4, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x5, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9, 0x6, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x7, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb, 0x8, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, 0x9, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0xa, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6, 0xb, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0xc, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0, 0xd, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, 0xe, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0xf, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3},
- {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, 0xd4, 0xd, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17, 0xa4, 0x7d, 0xb, 0xd2, 0xe7, 0x3e, 0x48, 0x91, 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x6, 0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, 0x7, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, 0xd3, 0xa, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0xc, 0xd5, 0x77, 0xae, 0xd8, 0x1, 0x34, 0xed, 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0xe, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, 0xda, 0x3, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a, 0xaa, 0x73, 0x5, 0xdc, 0xe9, 0x30, 0x46, 0x9f, 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x8, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x9, 0xd0, 0xa6, 0x7f, 0xdd, 0x4, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x2, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, 0x79, 0xa0, 0xd6, 0xf, 0x3a, 0xe3, 0x95, 0x4c},
- {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, 0x42, 0x98, 0xeb, 0x31, 0xd, 0xd7, 0xa4, 0x7e, 0xdc, 0x6, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, 0xc, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x7, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x1, 0xdb, 0x79, 0xa3, 0xd0, 0xa, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x8, 0xaa, 0x70, 0x3, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x5, 0x39, 0xe3, 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0xe, 0xd4, 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x4, 0x77, 0xad, 0xf, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12, 0xd3, 0x9, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, 0x4d, 0x97, 0xe4, 0x3e, 0x2, 0xd8, 0xab, 0x71, 0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50},
- {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad, 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0xa, 0xa7, 0x7c, 0xc, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, 0xa6, 0x7d, 0xd, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0xb, 0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x7, 0x77, 0xac, 0x1, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x5, 0x75, 0xae, 0x3, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, 0xf, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x9, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x8, 0xa5, 0x7e, 0xe, 0xd5, 0xee, 0x35, 0x45, 0x9e, 0x2, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x4, 0x74, 0xaf},
- {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x9, 0x70, 0xac, 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x2, 0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, 0x58, 0x84, 0xfd, 0x21, 0xf, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x4, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b, 0xda, 0x6, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, 0x74, 0xa8, 0xd1, 0xd, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0xa, 0x24, 0xf8, 0x81, 0x5d, 0xdd, 0x1, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x3, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x8, 0xd4, 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x5, 0x85, 0x59, 0x20, 0xfc, 0xd2, 0xe, 0x77, 0xab, 0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0xc, 0xd0, 0xfe, 0x22, 0x5b, 0x87, 0x7, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68},
- {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x2, 0xdf, 0xa5, 0x78, 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x3, 0xde, 0xa2, 0x7f, 0x5, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x4, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x7, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x6, 0xdb, 0xa1, 0x7c, 0x59, 0x84, 0xfe, 0x23, 0xa, 0xd7, 0xad, 0x70, 0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0xb, 0xd6, 0x8, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x9, 0xd4, 0xfd, 0x20, 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0xf, 0xd2, 0x5d, 0x80, 0xfa, 0x27, 0xe, 0xd3, 0xa9, 0x74, 0xaa, 0x77, 0xd, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0xc, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97},
- {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x1, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x3, 0x23, 0xfd, 0x82, 0x5c, 0xa3, 0x7d, 0x2, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x7, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x6, 0xd8, 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, 0xfa, 0x24, 0x4, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x5, 0x7a, 0xa4, 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0xf, 0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0xe, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, 0x53, 0x8d, 0xf2, 0x2c, 0xc, 0xd2, 0xad, 0x73, 0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0xd, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96, 0x8, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, 0xd7, 0x9, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0xb, 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, 0xab, 0x75, 0xa, 0xd4, 0xf4, 0x2a, 0x55, 0x8b},
- {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0xd, 0x2a, 0xf5, 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, 0xf7, 0x28, 0xf, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x2, 0xd9, 0x6, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, 0xb, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x9, 0x75, 0xaa, 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x4, 0xdb, 0xaf, 0x70, 0xc, 0xd3, 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e, 0xde, 0x1, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x3, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0xe, 0x72, 0xad, 0x76, 0xa9, 0xd5, 0xa, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x7, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x5, 0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, 0x8, 0xd7, 0xab, 0x74},
- {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, 0xa6, 0x46, 0x7b, 0x9b, 0x1, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb, 0x2, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, 0xa4, 0x44, 0x79, 0x99, 0x3, 0xe3, 0xde, 0x3e, 0xa2, 0x42, 0x7f, 0x9f, 0x5, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b, 0x4, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, 0x7d, 0x9d, 0x7, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf, 0x6, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, 0xa, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, 0xb, 0xeb, 0xd6, 0x36, 0x8, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, 0xae, 0x4e, 0x73, 0x93, 0x9, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0xf, 0xef, 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, 0xe, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, 0x77, 0x97, 0xd, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63, 0xc, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5},
- {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b, 0xed, 0xc, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0xd, 0xec, 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x1, 0xe2, 0x3, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0xf, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0xe, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x2, 0xe3, 0xd9, 0x38, 0x6, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0xa, 0x97, 0x76, 0x48, 0xa9, 0xa8, 0x49, 0x77, 0x96, 0xb, 0xea, 0xd4, 0x35, 0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x7, 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x5, 0x98, 0x79, 0x47, 0xa6, 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x9, 0xe8, 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x8, 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, 0x78, 0x99, 0x4, 0xe5, 0xdb, 0x3a},
- {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x3, 0xd4, 0x36, 0xd, 0xef, 0x7b, 0x99, 0xa2, 0x40, 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, 0xb, 0xe9, 0xd2, 0x30, 0xe7, 0x5, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73, 0x33, 0xd1, 0xea, 0x8, 0x9c, 0x7e, 0x45, 0xa7, 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x6, 0xe4, 0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x9, 0x32, 0xd0, 0x7, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82, 0xd3, 0x31, 0xa, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x4, 0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x2, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0xc, 0xee, 0xd5, 0x37, 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x1, 0xe3, 0x34, 0xd6, 0xed, 0xf, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26},
- {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8, 0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x5, 0xdd, 0x3e, 0x6, 0xe5, 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x9, 0x9a, 0x79, 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0xa, 0xe9, 0xa7, 0x44, 0x7c, 0x9f, 0xc, 0xef, 0xd7, 0x34, 0xec, 0xf, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0xd, 0xee, 0xd6, 0x35, 0xed, 0xe, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, 0x30, 0xd3, 0xeb, 0x8, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0xb, 0xe8, 0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x4, 0xdc, 0x3f, 0x7, 0xe4, 0x77, 0x94, 0xac, 0x4f, 0x1, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x2, 0x3a, 0xd9},
- {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x2, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, 0xd1, 0x35, 0x4, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x6, 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x8, 0xec, 0xdd, 0x39, 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, 0x8c, 0x68, 0xee, 0xa, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0xc, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0xe, 0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x1, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x3, 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, 0x67, 0x83, 0x5, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x7, 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, 0xdc, 0x38, 0x9, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0xb, 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf, 0xd, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0xf, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e},
- {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0xc, 0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0xb, 0x7, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x5, 0x9, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, 0xe, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x2, 0xe3, 0x6, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0xa, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0xd, 0xe8, 0xe4, 0x1, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x3, 0xe6, 0xea, 0xf, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x8, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x4, 0xe1},
- {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb, 0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, 0x86, 0x60, 0xe8, 0xe, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x3, 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x8, 0x80, 0x66, 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x5, 0x32, 0xd4, 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x6, 0xe0, 0xb, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x7, 0xe1, 0xd6, 0x30, 0xdb, 0x3d, 0xa, 0xec, 0x64, 0x82, 0xb5, 0x53, 0xef, 0x9, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x4, 0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0xf, 0xe4, 0x2, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x1, 0xe7, 0x6f, 0x89, 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0xc, 0xea, 0xdd, 0x3b, 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd},
- {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4, 0xd6, 0x31, 0x5, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, 0xbd, 0x5a, 0x6e, 0x89, 0x6, 0xe1, 0xd5, 0x32, 0xb1, 0x56, 0x62, 0x85, 0xa, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x9, 0xee, 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0xf, 0xe8, 0xc, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0xe, 0xea, 0xd, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0xb, 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x8, 0x87, 0x60, 0x54, 0xb3, 0x30, 0xd7, 0xe3, 0x4, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x7, 0x33, 0xd4, 0xe6, 0x1, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x2},
- {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, 0xeb, 0x3, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x6, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x5, 0x20, 0xc8, 0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0xc, 0xe4, 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0xf, 0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0xa, 0xe2, 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50, 0xe1, 0x9, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, 0x2d, 0xc5, 0xe0, 0x8, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0xb, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0xe, 0x2b, 0xc3, 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0xd, 0xe5, 0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x4, 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, 0x4d, 0xa5, 0x7, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x2, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, 0xcc, 0x24, 0x1, 0xe9, 0x4b, 0xa3, 0x86, 0x6e},
- {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0xb, 0xae, 0x47, 0x61, 0x88, 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x6, 0x20, 0xc9, 0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0xd, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, 0xc3, 0x2a, 0xc, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x7, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0xa, 0x82, 0x6b, 0x4d, 0xa4, 0x1, 0xe8, 0xce, 0x27, 0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x8, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, 0x80, 0x69, 0x4f, 0xa6, 0x3, 0xea, 0xcc, 0x25, 0xc1, 0x28, 0xe, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52, 0xec, 0x5, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, 0xa1, 0x48, 0xed, 0x4, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0xf, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, 0x2, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, 0xe0, 0x9, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91},
- {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, 0x6, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x5, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0xc, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0, 0xf, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3, 0xa, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, 0x9, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0xb, 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x8, 0xa4, 0x4e, 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0xd, 0xa1, 0x4b, 0x68, 0x82, 0x2d, 0xc7, 0xe4, 0xe, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, 0xed, 0x7, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x4, 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x1, 0xad, 0x47, 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x2, 0xae, 0x44, 0x67, 0x8d},
- {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0xc, 0xa7, 0x4c, 0x6c, 0x87, 0x27, 0xcc, 0xec, 0x7, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0xe, 0xe5, 0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x5, 0xee, 0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x2, 0x22, 0xc9, 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x9, 0x29, 0xc2, 0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0xd, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x6, 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, 0x1, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0xa, 0xe1, 0xc1, 0x2a, 0xe8, 0x3, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, 0xe3, 0x8, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0xf, 0xe4, 0x4f, 0xa4, 0x84, 0x6f, 0xcf, 0x24, 0x4, 0xef, 0x44, 0xaf, 0x8f, 0x64, 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72},
- {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x7, 0xeb, 0xcc, 0x20, 0x9, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0xe, 0xe2, 0xcb, 0x27, 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x8, 0xe3, 0xf, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x1, 0x28, 0xc4, 0x2f, 0xc3, 0xea, 0x6, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0xd, 0xb3, 0x5f, 0x76, 0x9a, 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0xa, 0x23, 0xcf, 0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x4, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x3, 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, 0x57, 0xbb, 0x5, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a, 0xc7, 0x2b, 0x2, 0xee, 0x50, 0xbc, 0x95, 0x79, 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0xc, 0xe0, 0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86, 0xb, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5},
- {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x8, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4, 0xec, 0x1, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, 0x9a, 0x77, 0x5d, 0xb0, 0x9, 0xe4, 0xce, 0x23, 0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x2, 0xef, 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0xa, 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, 0xee, 0x3, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0xb, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, 0x97, 0x7a, 0x50, 0xbd, 0x4, 0xe9, 0xc3, 0x2e, 0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0xc, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x5, 0x2f, 0xc2, 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0xd, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x6, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, 0x24, 0xc9, 0xe3, 0xe, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x7, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c, 0xc8, 0x25, 0xf, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a},
- {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0xb, 0xe5, 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x7, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59, 0x5, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, 0x26, 0xc8, 0xe7, 0x9, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, 0xcf, 0x21, 0xe, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x2, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0xa, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x6, 0xb6, 0x58, 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x1, 0xef, 0x5f, 0xb1, 0x9e, 0x70, 0xe3, 0xd, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, 0xf, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, 0xed, 0x3, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, 0x4, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x8, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56},
- {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c, 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0xe, 0xe1, 0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x9, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, 0xe9, 0x6, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, 0x1, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x8, 0x94, 0x7b, 0x57, 0xb8, 0xf, 0xe0, 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0xa, 0xbd, 0x52, 0x7e, 0x91, 0xd, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x4, 0x28, 0xc7, 0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x3, 0xec, 0xcf, 0x20, 0xc, 0xe3, 0x54, 0xbb, 0x97, 0x78, 0xe4, 0xb, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x2, 0xed, 0xc1, 0x2e, 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x5, 0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9},
- {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x3, 0xf3, 0xfe, 0xe, 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c, 0x5, 0xf5, 0xf8, 0x8, 0xe2, 0x12, 0x1f, 0xef, 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84, 0x6, 0xf6, 0xfb, 0xb, 0xe1, 0x11, 0x1c, 0xec, 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88, 0xa, 0xfa, 0xf7, 0x7, 0xed, 0x1d, 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x9, 0xf9, 0xf4, 0x4, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0xf, 0xff, 0xf2, 0x2, 0xe8, 0x18, 0x15, 0xe5, 0xc, 0xfc, 0xf1, 0x1, 0xeb, 0x1b, 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e},
- {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x3, 0xf2, 0xfc, 0xd, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x5, 0xf4, 0xfa, 0xb, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, 0x6, 0xf7, 0xf9, 0x8, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x9, 0xf8, 0xf6, 0x7, 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, 0xa, 0xfb, 0xf5, 0x4, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91, 0xc, 0xfd, 0xf3, 0x2, 0xef, 0x1e, 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, 0xec, 0x1d, 0x13, 0xe2, 0xf, 0xfe, 0xf0, 0x1, 0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71},
- {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x7, 0xf5, 0xfe, 0xc, 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, 0xe, 0xfc, 0xf7, 0x5, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x9, 0xfb, 0xf0, 0x2, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x6, 0xd, 0xff, 0x1b, 0xe9, 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x1, 0xa, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x8, 0x3, 0xf1, 0x15, 0xe7, 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0xf, 0x4, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d},
- {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, 0xb, 0xf8, 0xf0, 0x3, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0xe, 0x6, 0xf5, 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x5, 0xd, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0xc, 0xff, 0xf7, 0x4, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, 0x7, 0xf4, 0xfc, 0xf, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x2, 0xa, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, 0xfa, 0x9, 0x1, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92},
- {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5, 0xfb, 0xf, 0xe, 0xfa, 0xc, 0xf8, 0xf9, 0xd, 0x8, 0xfc, 0xfd, 0x9, 0xff, 0xb, 0xa, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55},
- {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa, 0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0xb, 0xfe, 0xfc, 0x9, 0xf8, 0xd, 0xf, 0xfa, 0xf0, 0x5, 0x7, 0xf2, 0x3, 0xf6, 0xf4, 0x1, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa},
- {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, 0x3, 0xf5, 0xf2, 0x4, 0xfc, 0xa, 0xd, 0xfb, 0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, 0x5, 0xf3, 0xf4, 0x2, 0xfa, 0xc, 0xb, 0xfd, 0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6, 0x6, 0xf0, 0xf7, 0x1, 0xf9, 0xf, 0x8, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6},
- {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0xb, 0xfc, 0xf8, 0xf, 0xf0, 0x7, 0x3, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0xa, 0xe, 0xf9, 0x6, 0xf1, 0xf5, 0x2, 0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2, 0xf6, 0x1, 0x5, 0xf2, 0xd, 0xfa, 0xfe, 0x9, 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49},
- {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x4, 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, 0x8, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0xc, 0xec, 0x14, 0x1, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x5, 0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48, 0x9, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, 0xdf, 0x27, 0xf5, 0xd, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, 0x2, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84, 0xfe, 0x6, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0xa, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0xe, 0xdc, 0x24, 0x31, 0xc9, 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x3, 0xfb, 0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x7, 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, 0xcc, 0x34, 0x21, 0xd9, 0xb, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0xf, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25},
- {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x7, 0xfe, 0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0xe, 0xf7, 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, 0xe6, 0x1f, 0x9, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, 0xf4, 0xd, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0xa, 0xdf, 0x26, 0x30, 0xc9, 0xfa, 0x3, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x4, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0xb, 0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0xc, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x5, 0xd0, 0x29, 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x2, 0x14, 0xed, 0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x6, 0xff, 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x1, 0xf8, 0xee, 0x17, 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x8, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41, 0xf, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda},
- {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x8, 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x3, 0x10, 0xea, 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0xb, 0xf1, 0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0, 0xef, 0x15, 0x6, 0xfc, 0x20, 0xda, 0xc9, 0x33, 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0xe, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x5, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, 0xc2, 0x38, 0x2b, 0xd1, 0xd, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x4, 0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0xc, 0xf6, 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, 0xee, 0x14, 0x7, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0xf, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x2, 0x11, 0xeb, 0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0xa, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e, 0x1, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, 0xf3, 0x9, 0xd5, 0x2f, 0x3c, 0xc6},
- {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50, 0xb, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, 0xfd, 0x6, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0xd, 0xd6, 0x2d, 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0xc, 0xf7, 0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x7, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0xa, 0x1a, 0xe1, 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, 0xda, 0x21, 0xfa, 0x1, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x8, 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x3, 0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0xe, 0xf5, 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, 0xce, 0x35, 0x25, 0xde, 0x5, 0xfe, 0xee, 0x15, 0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x4, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4, 0xf4, 0xf, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, 0x2, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x9, 0xf2, 0x29, 0xd2, 0xc2, 0x39},
- {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x6, 0xf6, 0xa, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0xc, 0xf0, 0xf1, 0xd, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0xb, 0xf7, 0x7, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x1, 0xff, 0x3, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x5, 0xf9, 0x9, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0xf, 0xe, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x8, 0xf8, 0x4, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x2, 0xfe},
- {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x3, 0xfe, 0xe4, 0x19, 0xd6, 0x2b, 0x31, 0xcc, 0x5, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74, 0x6, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, 0xa, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, 0x3d, 0xc0, 0x9, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0xf, 0xf2, 0xe8, 0x15, 0xc, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0xd, 0x14, 0xe9, 0xf3, 0xe, 0xc7, 0x3a, 0x20, 0xdd, 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x8, 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0xb, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x7, 0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x4, 0xcd, 0x30, 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x2, 0xcb, 0x36, 0x2c, 0xd1, 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x1},
- {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x6, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0xa, 0xca, 0x34, 0x2b, 0xd5, 0xed, 0x13, 0xc, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, 0xd, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0xb, 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x7, 0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x1, 0xff, 0xe2, 0x1c, 0x3, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x5, 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94, 0xf7, 0x9, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, 0xf, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0xe, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x8, 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x4, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x2, 0xfc, 0xe3, 0x1d},
- {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x3, 0xfc, 0x3b, 0xc4, 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x5, 0xfa, 0xdd, 0x22, 0x3e, 0xc1, 0x6, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x9, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, 0x32, 0xcd, 0xa, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60, 0xc, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb, 0xec, 0x13, 0xf, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0xe, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0xd, 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, 0xf4, 0xb, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x8, 0xcf, 0x30, 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, 0xf8, 0x7, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, 0xfb, 0x4, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x2, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, 0xc6, 0x39, 0xfe, 0x1, 0x1d, 0xe2}}
-
-var mulTableLow = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
- {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
- {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
- {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
- {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
- {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
- {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
- {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
- {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
- {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
- {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
- {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
- {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
- {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
- {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
- {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
- {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
- {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
- {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
- {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
- {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
- {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
- {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
- {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
- {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
- {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
- {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
- {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
- {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
- {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
- {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
- {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
- {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
- {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
- {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
- {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
- {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
- {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
- {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
- {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
- {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
- {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
- {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
- {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
- {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
- {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
- {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
- {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
- {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
- {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
- {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
- {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
- {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
- {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
- {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
- {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
- {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
- {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
- {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
- {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
- {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
- {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
- {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
- {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
- {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
- {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
- {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
- {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
- {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
- {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
- {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
- {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
- {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
- {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
- {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
- {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e},
- {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
- {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
- {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
- {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
- {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
- {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
- {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
- {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
- {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
- {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
- {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
- {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
- {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
- {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
- {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
- {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
- {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
- {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
- {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
- {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
- {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
- {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
- {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
- {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
- {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
- {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
- {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
- {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
- {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
- {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
- {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
- {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
- {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
- {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
- {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
- {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
- {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
- {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
- {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
- {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
- {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
- {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
- {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
- {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
- {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
- {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
- {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
- {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
- {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
- {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
- {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
- {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
- {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
- {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
- {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
- {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
- {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
- {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
- {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
- {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
- {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
- {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
- {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
- {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
- {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
- {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
- {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
- {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
- {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
- {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
- {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
- {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
- {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
- {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
- {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
- {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
- {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
- {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
- {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
- {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
- {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
- {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
- {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
- {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
- {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
- {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
- {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
- {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
- {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
- {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
- {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
- {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
- {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
- {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
- {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
- {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
- {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
- {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
- {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
- {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
- {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
- {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
- {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
- {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
- {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
- {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
- {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
- {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
- {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
- {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
- {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
- {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
- {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
- {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
- {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
- {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
- {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
- {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
- {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
- {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
- {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
- {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
- {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
- {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
- {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
- {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
- {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
- {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
- {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
- {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
- {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
- {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
- {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
- {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
- {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
- {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
- {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
- {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
- {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
- {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
- {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
- {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
- {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
- {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
- {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
- {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
- {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
- {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
- {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
- {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
- {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
- {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
- {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
- {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
- {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
- {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
- {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
- {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
- {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
- {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
- {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
- {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
- {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
- {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
- {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
- {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
- {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
- {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
- {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
- {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
- {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
- {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
- {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
- {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
- {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
- {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
- {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
- {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
- {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
- {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c}}
-var mulTableHigh = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
- {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
- {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
- {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
- {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
- {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
- {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
- {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
- {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
- {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
- {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
- {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
- {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
- {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
- {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
- {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
- {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
- {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
- {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
- {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
- {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
- {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
- {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
- {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
- {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
- {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
- {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
- {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
- {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
- {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
- {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
- {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
- {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
- {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
- {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
- {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
- {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
- {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
- {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
- {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
- {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
- {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
- {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
- {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
- {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
- {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
- {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
- {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
- {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
- {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
- {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
- {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
- {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
- {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
- {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
- {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
- {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
- {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
- {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
- {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
- {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
- {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
- {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
- {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
- {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
- {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
- {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
- {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
- {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
- {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
- {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
- {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
- {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
- {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
- {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
- {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
- {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
- {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
- {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
- {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
- {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
- {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
- {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
- {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
- {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
- {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
- {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
- {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
- {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
- {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
- {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
- {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
- {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
- {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
- {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
- {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
- {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
- {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
- {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
- {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
- {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
- {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
- {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
- {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
- {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
- {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
- {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
- {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
- {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
- {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
- {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
- {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
- {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
- {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
- {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
- {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
- {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
- {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
- {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
- {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
- {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
- {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
- {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
- {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
- {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
- {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
- {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
- {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
- {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
- {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
- {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
- {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
- {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
- {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
- {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
- {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
- {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
- {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
- {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
- {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
- {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
- {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
- {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
- {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
- {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
- {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
- {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
- {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
- {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
- {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
- {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
- {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
- {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
- {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
- {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
- {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
- {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
- {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
- {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
- {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
- {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
- {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
- {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
- {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
- {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
- {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
- {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
- {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
- {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
- {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
- {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
- {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
- {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
- {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
- {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
- {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
- {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
- {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
- {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
- {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c},
- {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
- {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
- {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
- {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
- {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
- {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
- {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
- {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
- {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
- {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
- {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
- {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
- {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
- {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
- {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
- {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
- {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
- {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
- {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
- {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
- {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
- {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
- {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
- {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
- {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
- {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
- {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
- {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
- {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
- {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
- {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
- {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
- {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
- {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
- {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
- {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
- {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
- {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
- {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
- {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
- {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
- {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
- {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
- {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
- {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
- {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
- {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
- {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
- {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
- {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
- {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
- {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
- {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
- {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
- {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
- {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
- {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
- {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
- {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
- {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
- {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
- {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
- {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
- {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
- {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
- {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
- {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
- {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
- {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
- {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
- {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
- {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
- {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
- {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
- {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
- {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e}}
-
-// galMultiply multiplies to elements of the field.
-// Uses lookup table ~40% faster
-func galMultiply(a, b byte) byte {
- return mulTable[a][b]
-}
-
-// Original function:
-/*
-// galMultiply multiplies to elements of the field.
-func galMultiply(a, b byte) byte {
- if a == 0 || b == 0 {
- return 0
- }
- logA := int(logTable[a])
- logB := int(logTable[b])
- return expTable[logA+logB]
-}
-*/
-
-// galDivide is inverse of galMultiply.
-func galDivide(a, b byte) byte {
- if a == 0 {
- return 0
- }
- if b == 0 {
- panic("Argument 'divisor' is 0")
- }
- logA := int(logTable[a])
- logB := int(logTable[b])
- logResult := logA - logB
- if logResult < 0 {
- logResult += 255
- }
- return expTable[logResult]
-}
-
-// Computes a**n.
-//
-// The result will be the same as multiplying a times itself n times.
-func galExp(a byte, n int) byte {
- if n == 0 {
- return 1
- }
- if a == 0 {
- return 0
- }
-
- logA := logTable[a]
- logResult := int(logA) * n
- for logResult >= 255 {
- logResult -= 255
- }
- return expTable[logResult]
-}
-
-func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) []byte {
- if !avx2CodeGen {
- panic("codegen not enabled")
- }
- total := inputs * outputs
-
- // Duplicated in+out
- wantBytes := total * 32 * 2
- if cap(dst) < wantBytes {
- dst = make([]byte, wantBytes)
- } else {
- dst = dst[:wantBytes]
- }
- for i, row := range matrixRows[:outputs] {
- for j, idx := range row[inIdx : inIdx+inputs] {
- dstIdx := (j*outputs + i) * 64
- dstPart := dst[dstIdx:]
- dstPart = dstPart[:64]
- lo := mulTableLow[idx][:]
- hi := mulTableHigh[idx][:]
- copy(dstPart[:16], lo)
- copy(dstPart[16:32], lo)
- copy(dstPart[32:48], hi)
- copy(dstPart[48:64], hi)
- }
- }
- return dst
-}
-
-// xor slices writing to out.
-func sliceXorGo(in, out []byte, _ *options) {
- for len(out) >= 32 {
- inS := in[:32]
- v0 := binary.LittleEndian.Uint64(out[:8]) ^ binary.LittleEndian.Uint64(inS[:8])
- v1 := binary.LittleEndian.Uint64(out[8:16]) ^ binary.LittleEndian.Uint64(inS[8:16])
- v2 := binary.LittleEndian.Uint64(out[16:24]) ^ binary.LittleEndian.Uint64(inS[16:24])
- v3 := binary.LittleEndian.Uint64(out[24:32]) ^ binary.LittleEndian.Uint64(inS[24:32])
- binary.LittleEndian.PutUint64(out[:8], v0)
- binary.LittleEndian.PutUint64(out[8:16], v1)
- binary.LittleEndian.PutUint64(out[16:24], v2)
- binary.LittleEndian.PutUint64(out[24:32], v3)
- out = out[32:]
- in = in[32:]
- }
- out = out[:len(in)]
- for n, input := range in {
- out[n] ^= input
- }
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
deleted file mode 100644
index 9a249d2..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
+++ /dev/null
@@ -1,339 +0,0 @@
-//go:build !noasm && !appengine && !gccgo
-// +build !noasm,!appengine,!gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2019, Minio, Inc.
-
-package reedsolomon
-
-import (
- "sync"
-)
-
-//go:noescape
-func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
-
-//go:noescape
-func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
-
-//go:noescape
-func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
-
-const (
- dimIn = 8 // Number of input rows processed simultaneously
- dimOut81 = 1 // Number of output rows processed simultaneously for x1 routine
- dimOut82 = 2 // Number of output rows processed simultaneously for x2 routine
- dimOut84 = 4 // Number of output rows processed simultaneously for x4 routine
- matrixSize81 = (16 + 16) * dimIn * dimOut81 // Dimension of slice of matrix coefficient passed into x1 routine
- matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine
- matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine
-)
-
-// Construct block of matrix coefficients for single output row in parallel
-func setupMatrix81(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize81]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut81; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut81 {
- offset -= dimIn*dimOut81 - 1
- }
- }
- }
-}
-
-// Construct block of matrix coefficients for 2 output rows in parallel
-func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut82 {
- offset -= dimIn*dimOut82 - 1
- }
- }
- }
-}
-
-// Construct block of matrix coefficients for 4 output rows in parallel
-func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut84 {
- offset -= dimIn*dimOut84 - 1
- }
- }
- }
-}
-
-// Invoke AVX512 routine for single output row in parallel
-func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
- done := stop - start
- if done <= 0 || len(in) == 0 || len(out) == 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut81
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut81][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel81(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix81, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-// Invoke AVX512 routine for 2 output rows in parallel
-func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
- done := stop - start
- if done <= 0 || len(in) == 0 || len(out) == 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut82
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut82][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel82(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix82, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-// Invoke AVX512 routine for 4 output rows in parallel
-func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
- done := stop - start
- if done <= 0 || len(in) == 0 || len(out) == 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut84
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut84][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel84(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix84, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outputEnd int, matrixRows [][]byte, done int, stop int, out [][]byte, in [][]byte) {
- for c := inputOffset; c < inputEnd; c++ {
- for iRow := outputOffset; iRow < outputEnd; iRow++ {
- if c < len(matrixRows[iRow]) {
- mt := mulTable[matrixRows[iRow][c]][:256]
- for i := done; i < stop; i++ {
- if c == 0 { // only set value for first input column
- out[iRow][i] = mt[in[c][i]]
- } else { // and add for all others
- out[iRow][i] ^= mt[in[c][i]]
- }
- }
- }
- }
- }
-}
-
-// Perform the same as codeSomeShards, but taking advantage of
-// AVX512 parallelism for up to 4x faster execution as compared to AVX2
-func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
- // Process using no goroutines
- outputCount := len(outputs)
- start, end := 0, r.o.perRound
- if end > byteCount {
- end = byteCount
- }
- for start < byteCount {
- matrix84 := [matrixSize84]byte{}
- matrix82 := [matrixSize82]byte{}
- matrix81 := [matrixSize81]byte{}
-
- outputRow := 0
- // First process (multiple) batches of 4 output rows in parallel
- if outputRow+dimOut84 <= outputCount {
- for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
- galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix84)
- }
- }
- }
- // Then process a (single) batch of 2 output rows in parallel
- if outputRow+dimOut82 <= outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
- galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix82)
- }
- outputRow += dimOut82
- }
- // Lastly, we may have a single output row left (for uneven parity)
- if outputRow < outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
- galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix81)
- }
- }
-
- start = end
- end += r.o.perRound
- if end > byteCount {
- end = byteCount
- }
- }
-}
-
-// Perform the same as codeSomeShards, but taking advantage of
-// AVX512 parallelism for up to 4x faster execution as compared to AVX2
-func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
- outputCount := len(outputs)
- var wg sync.WaitGroup
- do := byteCount / r.o.maxGoroutines
- if do < r.o.minSplitSize {
- do = r.o.minSplitSize
- }
- // Make sizes divisible by 64
- do = (do + 63) & (^63)
- start := 0
- for start < byteCount {
- if start+do > byteCount {
- do = byteCount - start
- }
- wg.Add(1)
- go func(grStart, grStop int) {
- start, stop := grStart, grStart+r.o.perRound
- if stop > grStop {
- stop = grStop
- }
- // Loop for each round.
- matrix84 := [matrixSize84]byte{}
- matrix82 := [matrixSize82]byte{}
- matrix81 := [matrixSize81]byte{}
- for start < grStop {
- outputRow := 0
- // First process (multiple) batches of 4 output rows in parallel
- if outputRow+dimOut84 <= outputCount {
- // 1K matrix buffer
- for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
- galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix84)
- }
- }
- }
- // Then process a (single) batch of 2 output rows in parallel
- if outputRow+dimOut82 <= outputCount {
- // 512B matrix buffer
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
- galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix82)
- }
- outputRow += dimOut82
- }
- // Lastly, we may have a single output row left (for uneven parity)
- if outputRow < outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
- galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix81)
- }
- }
- start = stop
- stop += r.o.perRound
- if stop > grStop {
- stop = grStop
- }
- }
- wg.Done()
- }(start, start+do)
- start += do
- }
- wg.Wait()
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
deleted file mode 100644
index 09f1d0d..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
+++ /dev/null
@@ -1,402 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2019, Minio, Inc.
-
-#define LOAD(OFFSET) \
- MOVQ OFFSET(SI), BX \
- VMOVDQU64 (BX)(R11*1), Z0 \
- VPSRLQ $4, Z0, Z1 \ // high input
- VPANDQ Z2, Z0, Z0 \ // low input
- VPANDQ Z2, Z1, Z1 // high input
-
-#define GALOIS_MUL(MUL_LO, MUL_HI, LO, HI, OUT) \
- VPSHUFB Z0, MUL_LO, LO \ // mul low part
- VPSHUFB Z1, MUL_HI, HI \ // mul high part
- VPTERNLOGD $0x96, LO, HI, OUT
-
-#define GALOIS(C1, C2, IN, LO, HI, OUT) \
- VSHUFI64X2 $C1, IN, IN, LO \
- VSHUFI64X2 $C2, IN, IN, HI \
- GALOIS_MUL(LO, HI, LO, HI, OUT)
-
-//
-// Process single output row from a total of 8 input rows
-//
-// func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel81(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel81
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
-
- // Initialize multiplication constants
- VSHUFI64X2 $0x55, Z16, Z16, Z20
- VSHUFI64X2 $0xaa, Z16, Z16, Z24
- VSHUFI64X2 $0xff, Z16, Z16, Z28
- VSHUFI64X2 $0x00, Z16, Z16, Z16
-
- VSHUFI64X2 $0x55, Z17, Z17, Z21
- VSHUFI64X2 $0xaa, Z17, Z17, Z25
- VSHUFI64X2 $0xff, Z17, Z17, Z29
- VSHUFI64X2 $0x00, Z17, Z17, Z17
-
- VSHUFI64X2 $0x55, Z18, Z18, Z22
- VSHUFI64X2 $0xaa, Z18, Z18, Z26
- VSHUFI64X2 $0xff, Z18, Z18, Z30
- VSHUFI64X2 $0x00, Z18, Z18, Z18
-
- VSHUFI64X2 $0x55, Z19, Z19, Z23
- VSHUFI64X2 $0xaa, Z19, Z19, Z27
- VSHUFI64X2 $0xff, Z19, Z19, Z31
- VSHUFI64X2 $0x00, Z19, Z19, Z19
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel81:
- VMOVDQU64.Z (DX), K1, Z4
-
- LOAD(0x00) // &in[0][0]
- GALOIS_MUL(Z16, Z20, Z14, Z15, Z4)
-
- CMPQ AX, $1
- JE skip_avx512_parallel81
-
- LOAD(0x18) // &in[1][0]
- GALOIS_MUL(Z24, Z28, Z14, Z15, Z4)
-
- CMPQ AX, $2
- JE skip_avx512_parallel81
-
- LOAD(0x30) // &in[2][0]
- GALOIS_MUL(Z17, Z21, Z14, Z15, Z4)
-
- CMPQ AX, $3
- JE skip_avx512_parallel81
-
- LOAD(0x48) // &in[3][0]
- GALOIS_MUL(Z25, Z29, Z14, Z15, Z4)
-
- CMPQ AX, $4
- JE skip_avx512_parallel81
-
- LOAD(0x60) // &in[4][0]
- GALOIS_MUL(Z18, Z22, Z14, Z15, Z4)
-
- CMPQ AX, $5
- JE skip_avx512_parallel81
-
- LOAD(0x78) // &in[5][0]
- GALOIS_MUL(Z26, Z30, Z14, Z15, Z4)
-
- CMPQ AX, $6
- JE skip_avx512_parallel81
-
- LOAD(0x90) // &in[6][0]
- GALOIS_MUL(Z19, Z23, Z14, Z15, Z4)
-
- CMPQ AX, $7
- JE skip_avx512_parallel81
-
- LOAD(0xa8) // &in[7][0]
- GALOIS_MUL(Z27, Z31, Z14, Z15, Z4)
-
-skip_avx512_parallel81:
- VMOVDQU64 Z4, (DX)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel81
-
-done_avx512_parallel81:
- VZEROUPPER
- RET
-
-//
-// Process 2 output rows in parallel from a total of 8 input rows
-//
-// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel82(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel82
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
- VMOVDQU64 0x100(SI), Z20
- VMOVDQU64 0x140(SI), Z21
- VMOVDQU64 0x180(SI), Z22
- VMOVDQU64 0x1c0(SI), Z23
-
- // Initialize multiplication constants
- VSHUFI64X2 $0x55, Z16, Z16, Z24
- VSHUFI64X2 $0xaa, Z16, Z16, Z25
- VSHUFI64X2 $0xff, Z16, Z16, Z26
- VSHUFI64X2 $0x00, Z16, Z16, Z16
-
- VSHUFI64X2 $0x55, Z20, Z20, Z27
- VSHUFI64X2 $0xaa, Z20, Z20, Z28
- VSHUFI64X2 $0xff, Z20, Z20, Z29
- VSHUFI64X2 $0x00, Z20, Z20, Z20
-
- VSHUFI64X2 $0x55, Z17, Z17, Z30
- VSHUFI64X2 $0xaa, Z17, Z17, Z31
- VSHUFI64X2 $0xff, Z17, Z17, Z11
- VSHUFI64X2 $0x00, Z17, Z17, Z17
-
- VSHUFI64X2 $0x55, Z21, Z21, Z8
- VSHUFI64X2 $0xaa, Z21, Z21, Z9
- VSHUFI64X2 $0xff, Z21, Z21, Z10
- VSHUFI64X2 $0x00, Z21, Z21, Z21
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ 24(DX), CX // CX: &out[1][0]
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel82:
- VMOVDQU64.Z (DX), K1, Z4
- VMOVDQU64.Z (CX), K1, Z5
-
- LOAD(0x00) // &in[0][0]
- GALOIS_MUL(Z16, Z24, Z14, Z15, Z4)
- GALOIS_MUL(Z20, Z27, Z12, Z13, Z5)
-
- CMPQ AX, $1
- JE skip_avx512_parallel82
-
- LOAD(0x18) // &in[1][0]
- GALOIS_MUL(Z25, Z26, Z14, Z15, Z4)
- GALOIS_MUL(Z28, Z29, Z12, Z13, Z5)
-
- CMPQ AX, $2
- JE skip_avx512_parallel82
-
- LOAD(0x30) // &in[2][0]
- GALOIS_MUL(Z17, Z30, Z14, Z15, Z4)
- GALOIS_MUL(Z21, Z8, Z12, Z13, Z5)
-
- CMPQ AX, $3
- JE skip_avx512_parallel82
-
- LOAD(0x48) // &in[3][0]
- GALOIS_MUL(Z31, Z11, Z14, Z15, Z4)
- GALOIS_MUL(Z9, Z10, Z12, Z13, Z5)
-
- CMPQ AX, $4
- JE skip_avx512_parallel82
-
- LOAD(0x60) // &in[4][0]
- GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
-
- CMPQ AX, $5
- JE skip_avx512_parallel82
-
- LOAD(0x78) // &in[5][0]
- GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
-
- CMPQ AX, $6
- JE skip_avx512_parallel82
-
- LOAD(0x90) // &in[6][0]
- GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
-
- CMPQ AX, $7
- JE skip_avx512_parallel82
-
- LOAD(0xa8) // &in[7][0]
- GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
-
-skip_avx512_parallel82:
- VMOVDQU64 Z4, (DX)
- VMOVDQU64 Z5, (CX)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
- ADDQ $64, CX // out2+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel82
-
-done_avx512_parallel82:
- VZEROUPPER
- RET
-
-//
-// Process 4 output rows in parallel from a total of 8 input rows
-//
-// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel84(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel84
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
- VMOVDQU64 0x100(SI), Z20
- VMOVDQU64 0x140(SI), Z21
- VMOVDQU64 0x180(SI), Z22
- VMOVDQU64 0x1c0(SI), Z23
- VMOVDQU64 0x200(SI), Z24
- VMOVDQU64 0x240(SI), Z25
- VMOVDQU64 0x280(SI), Z26
- VMOVDQU64 0x2c0(SI), Z27
- VMOVDQU64 0x300(SI), Z28
- VMOVDQU64 0x340(SI), Z29
- VMOVDQU64 0x380(SI), Z30
- VMOVDQU64 0x3c0(SI), Z31
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ 24(DX), CX // CX: &out[1][0]
- MOVQ 48(DX), R10 // R10: &out[2][0]
- MOVQ 72(DX), R12 // R12: &out[3][0]
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel84:
- VMOVDQU64.Z (DX), K1, Z4
- VMOVDQU64.Z (CX), K1, Z5
- VMOVDQU64.Z (R10), K1, Z6
- VMOVDQU64.Z (R12), K1, Z7
-
- LOAD(0x00) // &in[0][0]
- GALOIS(0x00, 0x55, Z16, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z20, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z24, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z28, Z8, Z9, Z7)
-
- CMPQ AX, $1
- JE skip_avx512_parallel84
-
- LOAD(0x18) // &in[1][0]
- GALOIS(0xaa, 0xff, Z16, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z20, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z24, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z28, Z8, Z9, Z7)
-
- CMPQ AX, $2
- JE skip_avx512_parallel84
-
- LOAD(0x30) // &in[2][0]
- GALOIS(0x00, 0x55, Z17, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z21, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z25, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z29, Z8, Z9, Z7)
-
- CMPQ AX, $3
- JE skip_avx512_parallel84
-
- LOAD(0x48) // &in[3][0]
- GALOIS(0xaa, 0xff, Z17, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z21, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z25, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z29, Z8, Z9, Z7)
-
- CMPQ AX, $4
- JE skip_avx512_parallel84
-
- LOAD(0x60) // &in[4][0]
- GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z26, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z30, Z8, Z9, Z7)
-
- CMPQ AX, $5
- JE skip_avx512_parallel84
-
- LOAD(0x78) // &in[5][0]
- GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z26, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z30, Z8, Z9, Z7)
-
- CMPQ AX, $6
- JE skip_avx512_parallel84
-
- LOAD(0x90) // &in[6][0]
- GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z27, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z31, Z8, Z9, Z7)
-
- CMPQ AX, $7
- JE skip_avx512_parallel84
-
- LOAD(0xa8) // &in[7][0]
- GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z27, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z31, Z8, Z9, Z7)
-
-skip_avx512_parallel84:
- VMOVDQU64 Z4, (DX)
- VMOVDQU64 Z5, (CX)
- VMOVDQU64 Z6, (R10)
- VMOVDQU64 Z7, (R12)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
- ADDQ $64, CX // out2+=64
- ADDQ $64, R10 // out3+=64
- ADDQ $64, R12 // out4+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel84
-
-done_avx512_parallel84:
- VZEROUPPER
- RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
deleted file mode 100644
index ae6f289..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ /dev/null
@@ -1,346 +0,0 @@
-//go:build !noasm && !appengine && !gccgo
-// +build !noasm,!appengine,!gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package reedsolomon
-
-//go:noescape
-func galMulSSSE3(low, high, in, out []byte)
-
-//go:noescape
-func galMulSSSE3Xor(low, high, in, out []byte)
-
-//go:noescape
-func galMulAVX2Xor(low, high, in, out []byte)
-
-//go:noescape
-func galMulAVX2(low, high, in, out []byte)
-
-//go:noescape
-func sSE2XorSlice(in, out []byte)
-
-//go:noescape
-func galMulAVX2Xor_64(low, high, in, out []byte)
-
-//go:noescape
-func galMulAVX2_64(low, high, in, out []byte)
-
-//go:noescape
-func sSE2XorSlice_64(in, out []byte)
-
-// This is what the assembler routines do in blocks of 16 bytes:
-/*
-func galMulSSSE3(low, high, in, out []byte) {
- for n, input := range in {
- l := input & 0xf
- h := input >> 4
- out[n] = low[l] ^ high[h]
- }
-}
-
-func galMulSSSE3Xor(low, high, in, out []byte) {
- for n, input := range in {
- l := input & 0xf
- h := input >> 4
- out[n] ^= low[l] ^ high[h]
- }
-}
-*/
-
-// bigSwitchover is the size where 64 bytes are processed per loop.
-const bigSwitchover = 128
-
-func galMulSlice(c byte, in, out []byte, o *options) {
- if c == 1 {
- copy(out, in)
- return
- }
- if o.useAVX2 {
- if len(in) >= bigSwitchover {
- galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 6) << 6
- in = in[done:]
- out = out[done:]
- }
- if len(in) > 32 {
- galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 5) << 5
- in = in[done:]
- out = out[done:]
- }
- } else if o.useSSSE3 {
- galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 4) << 4
- in = in[done:]
- out = out[done:]
- }
- out = out[:len(in)]
- mt := mulTable[c][:256]
- for i := range in {
- out[i] = mt[in[i]]
- }
-}
-
-func galMulSliceXor(c byte, in, out []byte, o *options) {
- if c == 1 {
- sliceXor(in, out, o)
- return
- }
-
- if o.useAVX2 {
- if len(in) >= bigSwitchover {
- galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 6) << 6
- in = in[done:]
- out = out[done:]
- }
- if len(in) >= 32 {
- galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 5) << 5
- in = in[done:]
- out = out[done:]
- }
- } else if o.useSSSE3 {
- galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done := (len(in) >> 4) << 4
- in = in[done:]
- out = out[done:]
- }
- if len(in) == 0 {
- return
- }
- out = out[:len(in)]
- mt := mulTable[c][:256]
- for i := range in {
- out[i] ^= mt[in[i]]
- }
-}
-
-// simple slice xor
-func sliceXor(in, out []byte, o *options) {
- if o.useSSE2 {
- if len(in) >= bigSwitchover {
- sSE2XorSlice_64(in, out)
- done := (len(in) >> 6) << 6
- in = in[done:]
- out = out[done:]
- }
- if len(in) >= 16 {
- sSE2XorSlice(in, out)
- done := (len(in) >> 4) << 4
- in = in[done:]
- out = out[done:]
- }
- } else {
- sliceXorGo(in, out, o)
- return
- }
- out = out[:len(in)]
- for i := range in {
- out[i] ^= in[i]
- }
-}
-
-// 4-way butterfly
-func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- if len(work[0]) == 0 {
- return
- }
-
- t01 := &multiply256LUT[log_m01]
- t23 := &multiply256LUT[log_m23]
- t02 := &multiply256LUT[log_m02]
- if o.useAVX512 {
- if log_m01 == modulus {
- if log_m23 == modulus {
- if log_m02 == modulus {
- ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m02 == modulus {
- ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
- }
- }
- } else {
- if log_m23 == modulus {
- if log_m02 == modulus {
- ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m02 == modulus {
- ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
- }
- }
- }
- return
- } else if o.useAVX2 {
- if log_m01 == modulus {
- if log_m23 == modulus {
- if log_m02 == modulus {
- ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m02 == modulus {
- ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
- }
- }
- } else {
- if log_m23 == modulus {
- if log_m02 == modulus {
- ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m02 == modulus {
- ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
- } else {
- ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
- }
- }
- }
- return
- }
- ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- if len(work[0]) == 0 {
- return
- }
-
- t01 := &multiply256LUT[log_m01]
- t23 := &multiply256LUT[log_m23]
- t02 := &multiply256LUT[log_m02]
- if o.useAVX512 {
- if log_m02 == modulus {
- if log_m01 == modulus {
- if log_m23 == modulus {
- fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m23 == modulus {
- fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
- }
- }
- } else {
- if log_m01 == modulus {
- if log_m23 == modulus {
- fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m23 == modulus {
- fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
- }
- }
- }
- return
- } else if o.useAVX2 {
- if log_m02 == modulus {
- if log_m01 == modulus {
- if log_m23 == modulus {
- fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m23 == modulus {
- fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
- }
- }
- } else {
- if log_m01 == modulus {
- if log_m23 == modulus {
- fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
- }
- } else {
- if log_m23 == modulus {
- fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
- } else {
- fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
- }
- }
- }
- return
- }
- fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 2-way butterfly forward
-func fftDIT2(x, y []byte, log_m ffe, o *options) {
- if o.useAVX2 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- fftDIT2_avx2(x, y, tmp)
- }
- } else if o.useSSSE3 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- fftDIT2_ssse3(x, y, tmp)
- }
- } else {
- // Reference version:
- refMulAdd(x, y, log_m)
- sliceXor(x, y, o)
- }
-}
-
-// 2-way butterfly
-func ifftDIT2(x, y []byte, log_m ffe, o *options) {
- if o.useAVX2 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- ifftDIT2_avx2(x, y, tmp)
- }
- } else if o.useSSSE3 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- ifftDIT2_ssse3(x, y, tmp)
- }
- } else {
- // Reference version:
- sliceXor(x, y, o)
- refMulAdd(x, y, log_m)
- }
-}
-
-func mulgf16(x, y []byte, log_m ffe, o *options) {
- if o.useAVX2 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- mulgf16_avx2(x, y, tmp)
- }
- } else if o.useSSSE3 {
- if len(x) > 0 {
- tmp := &multiply256LUT[log_m]
- mulgf16_ssse3(x, y, tmp)
- }
- } else {
- refMul(x, y, log_m)
- }
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
deleted file mode 100644
index f1dc8d5..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
+++ /dev/null
@@ -1,370 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
-// and http://jerasure.org/jerasure/gf-complete/tree/master
-
-// func galMulSSSE3Xor(low, high, in, out []byte)
-TEXT ·galMulSSSE3Xor(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVOU (SI), X6 // X6 low
- MOVOU (DX), X7 // X7: high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X8
- PXOR X5, X5
- MOVQ in+48(FP), SI // R11: &in
- MOVQ in_len+56(FP), R9 // R9: len(in)
- MOVQ out+72(FP), DX // DX: &out
- PSHUFB X5, X8 // X8: lomask (unpacked)
- SHRQ $4, R9 // len(in) / 16
- MOVQ SI, AX
- MOVQ DX, BX
- ANDQ $15, AX
- ANDQ $15, BX
- CMPQ R9, $0
- JEQ done_xor
- ORQ AX, BX
- CMPQ BX, $0
- JNZ loopback_xor
-
-loopback_xor_aligned:
- MOVOA (SI), X0 // in[x]
- MOVOA (DX), X4 // out[x]
- MOVOA X0, X1 // in[x]
- MOVOA X6, X2 // low copy
- MOVOA X7, X3 // high copy
- PSRLQ $4, X1 // X1: high input
- PAND X8, X0 // X0: low input
- PAND X8, X1 // X0: high input
- PSHUFB X0, X2 // X2: mul low part
- PSHUFB X1, X3 // X3: mul high part
- PXOR X2, X3 // X3: Result
- PXOR X4, X3 // X3: Result xor existing out
- MOVOA X3, (DX) // Store
- ADDQ $16, SI // in+=16
- ADDQ $16, DX // out+=16
- SUBQ $1, R9
- JNZ loopback_xor_aligned
- JMP done_xor
-
-loopback_xor:
- MOVOU (SI), X0 // in[x]
- MOVOU (DX), X4 // out[x]
- MOVOU X0, X1 // in[x]
- MOVOU X6, X2 // low copy
- MOVOU X7, X3 // high copy
- PSRLQ $4, X1 // X1: high input
- PAND X8, X0 // X0: low input
- PAND X8, X1 // X0: high input
- PSHUFB X0, X2 // X2: mul low part
- PSHUFB X1, X3 // X3: mul high part
- PXOR X2, X3 // X3: Result
- PXOR X4, X3 // X3: Result xor existing out
- MOVOU X3, (DX) // Store
- ADDQ $16, SI // in+=16
- ADDQ $16, DX // out+=16
- SUBQ $1, R9
- JNZ loopback_xor
-
-done_xor:
- RET
-
-// func galMulSSSE3(low, high, in, out []byte)
-TEXT ·galMulSSSE3(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVOU (SI), X6 // X6 low
- MOVOU (DX), X7 // X7: high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X8
- PXOR X5, X5
- MOVQ in+48(FP), SI // R11: &in
- MOVQ in_len+56(FP), R9 // R9: len(in)
- MOVQ out+72(FP), DX // DX: &out
- PSHUFB X5, X8 // X8: lomask (unpacked)
- MOVQ SI, AX
- MOVQ DX, BX
- SHRQ $4, R9 // len(in) / 16
- ANDQ $15, AX
- ANDQ $15, BX
- CMPQ R9, $0
- JEQ done
- ORQ AX, BX
- CMPQ BX, $0
- JNZ loopback
-
-loopback_aligned:
- MOVOA (SI), X0 // in[x]
- MOVOA X0, X1 // in[x]
- MOVOA X6, X2 // low copy
- MOVOA X7, X3 // high copy
- PSRLQ $4, X1 // X1: high input
- PAND X8, X0 // X0: low input
- PAND X8, X1 // X0: high input
- PSHUFB X0, X2 // X2: mul low part
- PSHUFB X1, X3 // X3: mul high part
- PXOR X2, X3 // X3: Result
- MOVOA X3, (DX) // Store
- ADDQ $16, SI // in+=16
- ADDQ $16, DX // out+=16
- SUBQ $1, R9
- JNZ loopback_aligned
- JMP done
-
-loopback:
- MOVOU (SI), X0 // in[x]
- MOVOU X0, X1 // in[x]
- MOVOA X6, X2 // low copy
- MOVOA X7, X3 // high copy
- PSRLQ $4, X1 // X1: high input
- PAND X8, X0 // X0: low input
- PAND X8, X1 // X0: high input
- PSHUFB X0, X2 // X2: mul low part
- PSHUFB X1, X3 // X3: mul high part
- PXOR X2, X3 // X3: Result
- MOVOU X3, (DX) // Store
- ADDQ $16, SI // in+=16
- ADDQ $16, DX // out+=16
- SUBQ $1, R9
- JNZ loopback
-
-done:
- RET
-
-// func galMulAVX2Xor(low, high, in, out []byte)
-TEXT ·galMulAVX2Xor(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
-
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
-
- SHRQ $5, R9 // len(in) / 32
- MOVQ out+72(FP), DX // DX: &out
- MOVQ in+48(FP), SI // SI: &in
- TESTQ R9, R9
- JZ done_xor_avx2
-
-loopback_xor_avx2:
- VMOVDQU (SI), Y0
- VMOVDQU (DX), Y4
- VPSRLQ $4, Y0, Y1 // Y1: high input
- VPAND Y8, Y0, Y0 // Y0: low input
- VPAND Y8, Y1, Y1 // Y1: high input
- VPSHUFB Y0, Y6, Y2 // Y2: mul low part
- VPSHUFB Y1, Y7, Y3 // Y3: mul high part
- VPXOR Y3, Y2, Y3 // Y3: Result
- VPXOR Y4, Y3, Y4 // Y4: Result
- VMOVDQU Y4, (DX)
-
- ADDQ $32, SI // in+=32
- ADDQ $32, DX // out+=32
- SUBQ $1, R9
- JNZ loopback_xor_avx2
-
-done_xor_avx2:
- VZEROUPPER
- RET
-
-// func galMulAVX2(low, high, in, out []byte)
-TEXT ·galMulAVX2(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
-
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
-
- SHRQ $5, R9 // len(in) / 32
- MOVQ out+72(FP), DX // DX: &out
- MOVQ in+48(FP), SI // SI: &in
- TESTQ R9, R9
- JZ done_avx2
-
-loopback_avx2:
- VMOVDQU (SI), Y0
- VPSRLQ $4, Y0, Y1 // Y1: high input
- VPAND Y8, Y0, Y0 // Y0: low input
- VPAND Y8, Y1, Y1 // Y1: high input
- VPSHUFB Y0, Y6, Y2 // Y2: mul low part
- VPSHUFB Y1, Y7, Y3 // Y3: mul high part
- VPXOR Y3, Y2, Y4 // Y4: Result
- VMOVDQU Y4, (DX)
-
- ADDQ $32, SI // in+=32
- ADDQ $32, DX // out+=32
- SUBQ $1, R9
- JNZ loopback_avx2
-
-done_avx2:
- VZEROUPPER
- RET
-
-// func sSE2XorSlice(in, out []byte)
-TEXT ·sSE2XorSlice(SB), 7, $0
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), R9 // R9: len(in)
- MOVQ out+24(FP), DX // DX: &out
- SHRQ $4, R9 // len(in) / 16
- CMPQ R9, $0
- JEQ done_xor_sse2
-
-loopback_xor_sse2:
- MOVOU (SI), X0 // in[x]
- MOVOU (DX), X1 // out[x]
- PXOR X0, X1
- MOVOU X1, (DX)
- ADDQ $16, SI // in+=16
- ADDQ $16, DX // out+=16
- SUBQ $1, R9
- JNZ loopback_xor_sse2
-
-done_xor_sse2:
- RET
-
-// func galMulAVX2Xor_64(low, high, in, out []byte)
-TEXT ·galMulAVX2Xor_64(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
-
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
-
- SHRQ $6, R9 // len(in) / 64
- MOVQ out+72(FP), DX // DX: &out
- MOVQ in+48(FP), SI // SI: &in
- TESTQ R9, R9
- JZ done_xor_avx2_64
-
-loopback_xor_avx2_64:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y10
- VMOVDQU (DX), Y4
- VMOVDQU 32(DX), Y14
- VPSRLQ $4, Y0, Y1 // Y1: high input
- VPSRLQ $4, Y10, Y11 // Y11: high input 2
- VPAND Y8, Y0, Y0 // Y0: low input
- VPAND Y8, Y10, Y10 // Y10: low input 2
- VPAND Y8, Y1, Y1 // Y11: high input
- VPAND Y8, Y11, Y11 // Y11: high input 2
- VPSHUFB Y0, Y6, Y2 // Y2: mul low part
- VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
- VPSHUFB Y1, Y7, Y3 // Y3: mul high part
- VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
- VPXOR Y3, Y2, Y3 // Y3: Result
- VPXOR Y13, Y12, Y13 // Y13: Result 2
- VPXOR Y4, Y3, Y4 // Y4: Result
- VPXOR Y14, Y13, Y14 // Y4: Result 2
- VMOVDQU Y4, (DX)
- VMOVDQU Y14, 32(DX)
-
- ADDQ $64, SI // in+=64
- ADDQ $64, DX // out+=64
- SUBQ $1, R9
- JNZ loopback_xor_avx2_64
-
-done_xor_avx2_64:
- VZEROUPPER
- RET
-
-// func galMulAVX2_64(low, high, in, out []byte)
-TEXT ·galMulAVX2_64(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
-
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
-
- SHRQ $6, R9 // len(in) / 64
- MOVQ out+72(FP), DX // DX: &out
- MOVQ in+48(FP), SI // SI: &in
- TESTQ R9, R9
- JZ done_avx2_64
-
-loopback_avx2_64:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y10
- VPSRLQ $4, Y0, Y1 // Y1: high input
- VPSRLQ $4, Y10, Y11 // Y11: high input 2
- VPAND Y8, Y0, Y0 // Y0: low input
- VPAND Y8, Y10, Y10 // Y10: low input
- VPAND Y8, Y1, Y1 // Y1: high input
- VPAND Y8, Y11, Y11 // Y11: high input 2
- VPSHUFB Y0, Y6, Y2 // Y2: mul low part
- VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
- VPSHUFB Y1, Y7, Y3 // Y3: mul high part
- VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
- VPXOR Y3, Y2, Y4 // Y4: Result
- VPXOR Y13, Y12, Y14 // Y14: Result 2
- VMOVDQU Y4, (DX)
- VMOVDQU Y14, 32(DX)
-
- ADDQ $64, SI // in+=64
- ADDQ $64, DX // out+=64
- SUBQ $1, R9
- JNZ loopback_avx2_64
-
-done_avx2_64:
- VZEROUPPER
- RET
-
-// func sSE2XorSlice_64(in, out []byte)
-TEXT ·sSE2XorSlice_64(SB), 7, $0
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), R9 // R9: len(in)
- MOVQ out+24(FP), DX // DX: &out
- SHRQ $6, R9 // len(in) / 64
- CMPQ R9, $0
- JEQ done_xor_sse2_64
-
-loopback_xor_sse2_64:
- MOVOU (SI), X0 // in[x]
- MOVOU 16(SI), X2 // in[x]
- MOVOU 32(SI), X4 // in[x]
- MOVOU 48(SI), X6 // in[x]
- MOVOU (DX), X1 // out[x]
- MOVOU 16(DX), X3 // out[x]
- MOVOU 32(DX), X5 // out[x]
- MOVOU 48(DX), X7 // out[x]
- PXOR X0, X1
- PXOR X2, X3
- PXOR X4, X5
- PXOR X6, X7
- MOVOU X1, (DX)
- MOVOU X3, 16(DX)
- MOVOU X5, 32(DX)
- MOVOU X7, 48(DX)
- ADDQ $64, SI // in+=64
- ADDQ $64, DX // out+=64
- SUBQ $1, R9
- JNZ loopback_xor_sse2_64
-
-done_xor_sse2_64:
- RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
deleted file mode 100644
index 92b67b8..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
+++ /dev/null
@@ -1,96 +0,0 @@
-//go:build !noasm && !appengine && !gccgo
-// +build !noasm,!appengine,!gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2017, Minio, Inc.
-
-package reedsolomon
-
-//go:noescape
-func galMulNEON(low, high, in, out []byte)
-
-//go:noescape
-func galMulXorNEON(low, high, in, out []byte)
-
-//go:noescape
-func galXorNEON(in, out []byte)
-
-func galMulSlice(c byte, in, out []byte, o *options) {
- if c == 1 {
- copy(out, in)
- return
- }
- var done int
- galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done = (len(in) >> 5) << 5
-
- remain := len(in) - done
- if remain > 0 {
- mt := mulTable[c][:256]
- for i := done; i < len(in); i++ {
- out[i] = mt[in[i]]
- }
- }
-}
-
-func galMulSliceXor(c byte, in, out []byte, o *options) {
- if c == 1 {
- sliceXor(in, out, o)
- return
- }
- var done int
- galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
- done = (len(in) >> 5) << 5
-
- remain := len(in) - done
- if remain > 0 {
- mt := mulTable[c][:256]
- for i := done; i < len(in); i++ {
- out[i] ^= mt[in[i]]
- }
- }
-}
-
-// simple slice xor
-func sliceXor(in, out []byte, o *options) {
-
- galXorNEON(in, out)
- done := (len(in) >> 5) << 5
-
- remain := len(in) - done
- if remain > 0 {
- for i := done; i < len(in); i++ {
- out[i] ^= in[i]
- }
- }
-}
-
-// 4-way butterfly
-func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 4-way butterfly
-func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 2-way butterfly forward
-func fftDIT2(x, y []byte, log_m ffe, o *options) {
- // Reference version:
- refMulAdd(x, y, log_m)
- // 64 byte aligned, always full.
- galXorNEON(x, y)
-}
-
-// 2-way butterfly
-func ifftDIT2(x, y []byte, log_m ffe, o *options) {
- // 64 byte aligned, always full.
- galXorNEON(x, y)
- // Reference version:
- refMulAdd(x, y, log_m)
-}
-
-func mulgf16(x, y []byte, log_m ffe, o *options) {
- refMul(x, y, log_m)
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
deleted file mode 100644
index 3ae3237..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
+++ /dev/null
@@ -1,127 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2017, Minio, Inc.
-
-#define LOAD(LO1, LO2, HI1, HI2) \
- VLD1.P 32(R1), [LO1.B16, LO2.B16] \
- \
- \ // Get low input and high input
- VUSHR $4, LO1.B16, HI1.B16 \
- VUSHR $4, LO2.B16, HI2.B16 \
- VAND V8.B16, LO1.B16, LO1.B16 \
- VAND V8.B16, LO2.B16, LO2.B16
-
-#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
- \ // Mul low part and mul high part
- VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \
- VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
- VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \
- VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
- \
- \ // Combine results
- VEOR OUT2.B16, OUT1.B16, OUT1.B16 \
- VEOR TMP2.B16, TMP1.B16, OUT2.B16
-
-// func galMulNEON(low, high, in, out []byte)
-TEXT ·galMulNEON(SB), 7, $0
- MOVD in_base+48(FP), R1
- MOVD in_len+56(FP), R2 // length of message
- MOVD out_base+72(FP), R5
- SUBS $32, R2
- BMI complete
-
- MOVD low+0(FP), R10 // R10: &low
- MOVD high+24(FP), R11 // R11: &high
- VLD1 (R10), [V6.B16]
- VLD1 (R11), [V7.B16]
-
- //
- // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
- // WORD $0x4e010c68 // dup v8.16b, w3
- //
- MOVD $0x0f, R3
- VMOV R3, V8.B[0]
- VDUP V8.B[0], V8.B16
-
-loop:
- // Main loop
- LOAD(V0, V1, V10, V11)
- GALOIS_MUL(V6, V7, V4, V5, V14, V15)
-
- // Store result
- VST1.P [V4.D2, V5.D2], 32(R5)
-
- SUBS $32, R2
- BPL loop
-
-complete:
- RET
-
-// func galMulXorNEON(low, high, in, out []byte)
-TEXT ·galMulXorNEON(SB), 7, $0
- MOVD in_base+48(FP), R1
- MOVD in_len+56(FP), R2 // length of message
- MOVD out_base+72(FP), R5
- SUBS $32, R2
- BMI completeXor
-
- MOVD low+0(FP), R10 // R10: &low
- MOVD high+24(FP), R11 // R11: &high
- VLD1 (R10), [V6.B16]
- VLD1 (R11), [V7.B16]
-
- //
- // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
- // WORD $0x4e010c68 // dup v8.16b, w3
- //
- MOVD $0x0f, R3
- VMOV R3, V8.B[0]
- VDUP V8.B[0], V8.B16
-
-loopXor:
- // Main loop
- VLD1 (R5), [V20.B16, V21.B16]
-
- LOAD(V0, V1, V10, V11)
- GALOIS_MUL(V6, V7, V4, V5, V14, V15)
-
- VEOR V20.B16, V4.B16, V4.B16
- VEOR V21.B16, V5.B16, V5.B16
-
- // Store result
- VST1.P [V4.D2, V5.D2], 32(R5)
-
- SUBS $32, R2
- BPL loopXor
-
-completeXor:
- RET
-
-// func galXorNEON(in, out []byte)
-TEXT ·galXorNEON(SB), 7, $0
- MOVD in_base+0(FP), R1
- MOVD in_len+8(FP), R2 // length of message
- MOVD out_base+24(FP), R5
- SUBS $32, R2
- BMI completeXor
-
-loopXor:
- // Main loop
- VLD1.P 32(R1), [V0.B16, V1.B16]
- VLD1 (R5), [V20.B16, V21.B16]
-
- VEOR V20.B16, V0.B16, V4.B16
- VEOR V21.B16, V1.B16, V5.B16
-
- // Store result
- VST1.P [V4.D2, V5.D2], 32(R5)
-
- SUBS $32, R2
- BPL loopXor
-
-completeXor:
- RET
-
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
deleted file mode 100644
index 24d6a02..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
+++ /dev/null
@@ -1,1551 +0,0 @@
-// Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT.
-
-//go:build !appengine && !noasm && !nogen && gc
-
-package reedsolomon
-
-func _dummy_()
-
-// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x1_64 takes 1 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x1Xor takes 1 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x1_64Xor takes 1 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x2_64 takes 1 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x2Xor takes 1 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x2_64Xor takes 1 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x3_64 takes 1 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x3Xor takes 1 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x3_64Xor takes 1 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x4Xor takes 1 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x5Xor takes 1 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x6Xor takes 1 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x7Xor takes 1 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x8Xor takes 1 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x9 takes 1 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x9Xor takes 1 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x10 takes 1 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_1x10Xor takes 1 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x1_64 takes 2 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x1Xor takes 2 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x1_64Xor takes 2 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x2_64 takes 2 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x2Xor takes 2 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x2_64Xor takes 2 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x3_64 takes 2 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x3Xor takes 2 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x3_64Xor takes 2 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x4Xor takes 2 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x5Xor takes 2 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x6Xor takes 2 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x7Xor takes 2 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x8Xor takes 2 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x9 takes 2 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x9Xor takes 2 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x10 takes 2 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_2x10Xor takes 2 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x1_64 takes 3 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x1Xor takes 3 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x1_64Xor takes 3 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x2_64 takes 3 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x2Xor takes 3 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x2_64Xor takes 3 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x3_64 takes 3 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x3Xor takes 3 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x3_64Xor takes 3 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x4Xor takes 3 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x5Xor takes 3 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x6Xor takes 3 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x7Xor takes 3 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x8Xor takes 3 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x9 takes 3 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x9Xor takes 3 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x10 takes 3 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_3x10Xor takes 3 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x1_64 takes 4 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x1Xor takes 4 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x1_64Xor takes 4 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x2_64 takes 4 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x2Xor takes 4 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x2_64Xor takes 4 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x3_64 takes 4 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x3Xor takes 4 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x3_64Xor takes 4 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x4Xor takes 4 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x5Xor takes 4 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x6Xor takes 4 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x7Xor takes 4 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x8Xor takes 4 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x9 takes 4 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x9Xor takes 4 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x10 takes 4 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_4x10Xor takes 4 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x1_64 takes 5 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x1Xor takes 5 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x1_64Xor takes 5 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x2_64 takes 5 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x2Xor takes 5 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x2_64Xor takes 5 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x3_64 takes 5 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x3Xor takes 5 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x3_64Xor takes 5 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x4Xor takes 5 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x5Xor takes 5 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x6Xor takes 5 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x7Xor takes 5 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x8Xor takes 5 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x9 takes 5 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x9Xor takes 5 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x10 takes 5 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_5x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_5x10Xor takes 5 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x1_64 takes 6 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x1Xor takes 6 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x1_64Xor takes 6 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x2_64 takes 6 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x2Xor takes 6 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x2_64Xor takes 6 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x3_64 takes 6 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x3Xor takes 6 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x3_64Xor takes 6 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x4Xor takes 6 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x5Xor takes 6 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x6Xor takes 6 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x7Xor takes 6 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x8Xor takes 6 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x9 takes 6 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x9Xor takes 6 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x10 takes 6 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_6x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_6x10Xor takes 6 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x1_64 takes 7 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x1Xor takes 7 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x1_64Xor takes 7 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x2_64 takes 7 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x2Xor takes 7 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x2_64Xor takes 7 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x3_64 takes 7 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x3Xor takes 7 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x3_64Xor takes 7 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x4Xor takes 7 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x5Xor takes 7 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x6Xor takes 7 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x7Xor takes 7 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x8Xor takes 7 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x9 takes 7 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x9Xor takes 7 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x10 takes 7 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_7x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_7x10Xor takes 7 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x1_64 takes 8 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x1Xor takes 8 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x1_64Xor takes 8 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x2_64 takes 8 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x2Xor takes 8 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x2_64Xor takes 8 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x3_64 takes 8 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x3Xor takes 8 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x3_64Xor takes 8 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x4Xor takes 8 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x5Xor takes 8 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x6Xor takes 8 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x7Xor takes 8 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x8Xor takes 8 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x9 takes 8 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x9Xor takes 8 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x10 takes 8 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_8x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_8x10Xor takes 8 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x1_64 takes 9 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x1Xor takes 9 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x1_64Xor takes 9 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x2_64 takes 9 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x2Xor takes 9 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x2_64Xor takes 9 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x3_64 takes 9 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x3Xor takes 9 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x3_64Xor takes 9 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x4Xor takes 9 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x5Xor takes 9 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x6Xor takes 9 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x7Xor takes 9 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x8Xor takes 9 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x9 takes 9 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x9Xor takes 9 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x10 takes 9 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_9x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_9x10Xor takes 9 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x1_64 takes 10 inputs and produces 1 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x1Xor takes 10 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x1_64Xor takes 10 inputs and produces 1 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x2_64 takes 10 inputs and produces 2 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x2Xor takes 10 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x2_64Xor takes 10 inputs and produces 2 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x3_64 takes 10 inputs and produces 3 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x3Xor takes 10 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x3_64Xor takes 10 inputs and produces 3 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x4Xor takes 10 inputs and produces 4 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x5Xor takes 10 inputs and produces 5 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x6Xor takes 10 inputs and produces 6 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x7Xor takes 10 inputs and produces 7 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x8Xor takes 10 inputs and produces 8 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x9 takes 10 inputs and produces 9 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x9Xor takes 10 inputs and produces 9 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x10 takes 10 inputs and produces 10 outputs.
-// The output is initialized to 0.
-//
-//go:noescape
-func mulAvxTwo_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-// mulAvxTwo_10x10Xor takes 10 inputs and produces 10 outputs.
-//
-//go:noescape
-func mulAvxTwo_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-
-//go:noescape
-func ifftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
-
-//go:noescape
-func fftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
-
-//go:noescape
-func mulgf16_avx2(x []byte, y []byte, table *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func fftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-
-//go:noescape
-func ifftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
-
-//go:noescape
-func fftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
-
-//go:noescape
-func mulgf16_ssse3(x []byte, y []byte, table *[128]uint8)
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
deleted file mode 100644
index 890461e..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
+++ /dev/null
@@ -1,67609 +0,0 @@
-// Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT.
-
-//go:build !appengine && !noasm && !nogen && gc
-
-#include "textflag.h"
-
-// func _dummy_()
-TEXT ·_dummy_(SB), $0
-#ifdef GOAMD64_v4
-#define XOR3WAY(ignore, a, b, dst) \
- VPTERNLOGD $0x96, a, b, dst
-
-#else
-#define XOR3WAY(ignore, a, b, dst) \
- VPXOR a, dst, dst \
- VPXOR b, dst, dst
-
-#endif
- RET
-
-// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 6 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ start+72(FP), BX
-
- // Add start offset to output
- ADDQ BX, DX
-
- // Add start offset to input
- ADDQ BX, CX
- MOVQ $0x0000000f, BX
- MOVQ BX, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_1x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (CX), Y2
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y2, Y4
- VPAND Y3, Y2, Y2
- VPAND Y3, Y4, Y4
- VPSHUFB Y2, Y0, Y2
- VPSHUFB Y4, Y1, Y4
- VPXOR Y2, Y4, Y2
-
- // Store 1 outputs
- VMOVDQU Y2, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x1_loop
- VZEROUPPER
-
-mulAvxTwo_1x1_end:
- RET
-
-// func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x1_64(SB), $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 10 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x1_64_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ start+72(FP), BX
-
- // Add start offset to output
- ADDQ BX, DX
-
- // Add start offset to input
- ADDQ BX, CX
- MOVQ $0x0000000f, BX
- MOVQ BX, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- ADDQ $0x40, CX
- VPSRLQ $0x04, Y2, Y6
- VPSRLQ $0x04, Y3, Y5
- VPAND Y4, Y2, Y2
- VPAND Y4, Y3, Y3
- VPAND Y4, Y6, Y6
- VPAND Y4, Y5, Y5
- VPSHUFB Y2, Y0, Y2
- VPSHUFB Y3, Y0, Y3
- VPSHUFB Y6, Y1, Y6
- VPSHUFB Y5, Y1, Y5
- VPXOR Y2, Y6, Y2
- VPXOR Y3, Y5, Y3
-
- // Store 1 outputs
- VMOVDQU Y2, (DX)
- VMOVDQU Y3, 32(DX)
- ADDQ $0x40, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_1x1_64_end:
- RET
-
-// func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 6 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ start+72(FP), BX
-
- // Add start offset to output
- ADDQ BX, DX
-
- // Add start offset to input
- ADDQ BX, CX
- MOVQ $0x0000000f, BX
- MOVQ BX, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_1x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (CX), Y4
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y3, Y4, Y4
- VPAND Y3, Y5, Y5
- VMOVDQU (DX), Y2
- VPSHUFB Y4, Y0, Y4
- VPSHUFB Y5, Y1, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 1 outputs
- VMOVDQU Y2, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x1Xor_end:
- RET
-
-// func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x1_64Xor(SB), $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 10 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x1_64Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ start+72(FP), BX
-
- // Add start offset to output
- ADDQ BX, DX
-
- // Add start offset to input
- ADDQ BX, CX
- MOVQ $0x0000000f, BX
- MOVQ BX, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (DX), Y2
- VMOVDQU 32(DX), Y3
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y7
- ADDQ $0x40, CX
- VPSRLQ $0x04, Y5, Y6
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y5, Y5
- VPAND Y4, Y7, Y7
- VPAND Y4, Y6, Y6
- VPAND Y4, Y8, Y8
- VPSHUFB Y5, Y0, Y5
- VPSHUFB Y7, Y0, Y7
- VPSHUFB Y6, Y1, Y6
- VPSHUFB Y8, Y1, Y8
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 1 outputs
- VMOVDQU Y2, (DX)
- VMOVDQU Y3, 32(DX)
- ADDQ $0x40, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x1_64Xor_end:
- RET
-
-// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x2(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 11 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x2_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
- ADDQ SI, DX
-
- // Add start offset to input
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (CX), Y8
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y9, Y9
- VPSHUFB Y8, Y0, Y5
- VPSHUFB Y9, Y1, Y7
- VPXOR Y5, Y7, Y4
- VPSHUFB Y8, Y2, Y5
- VPSHUFB Y9, Y3, Y7
- VPXOR Y5, Y7, Y5
-
- // Store 2 outputs
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
- VMOVDQU Y5, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x2_loop
- VZEROUPPER
-
-mulAvxTwo_1x2_end:
- RET
-
-// func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 17 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), BX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
- ADDQ DI, BX
-
- // Add start offset to input
- ADDQ DI, DX
- MOVQ $0x0000000f, DI
- MOVQ DI, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y9
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y7, Y8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y4, Y7, Y7
- VPAND Y4, Y9, Y9
- VPAND Y4, Y8, Y8
- VPAND Y4, Y10, Y10
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y6
- VPSHUFB Y9, Y2, Y3
- VPSHUFB Y7, Y2, Y2
- VPSHUFB Y10, Y6, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y2, Y6, Y0
- VPXOR Y3, Y5, Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y6
- VPSHUFB Y9, Y2, Y3
- VPSHUFB Y7, Y2, Y2
- VPSHUFB Y10, Y6, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y2, Y6, Y2
- VPXOR Y3, Y5, Y3
-
- // Store 2 outputs
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (BX)
- VMOVDQU Y3, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_1x2_64_end:
- RET
-
-// func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x2Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 11 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x2Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
- ADDQ SI, DX
-
- // Add start offset to input
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (CX), Y9
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (BX), Y4
- VPSHUFB Y9, Y0, Y7
- VPSHUFB Y10, Y1, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (DX), Y5
- VPSHUFB Y9, Y2, Y7
- VPSHUFB Y10, Y3, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 2 outputs
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
- VMOVDQU Y5, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x2Xor_end:
- RET
-
-// func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 17 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), BX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
- ADDQ DI, BX
-
- // Add start offset to input
- ADDQ DI, DX
- MOVQ $0x0000000f, DI
- MOVQ DI, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (BX), Y2
- VMOVDQU 32(BX), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (BX)
- VMOVDQU Y3, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x2_64Xor_end:
- RET
-
-// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x3(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x3_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, BX
- ADDQ DI, SI
- ADDQ DI, DX
-
- // Add start offset to input
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_1x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (CX), Y11
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y9, Y11, Y11
- VPAND Y9, Y12, Y12
- VPSHUFB Y11, Y0, Y8
- VPSHUFB Y12, Y1, Y10
- VPXOR Y8, Y10, Y6
- VPSHUFB Y11, Y2, Y8
- VPSHUFB Y12, Y3, Y10
- VPXOR Y8, Y10, Y7
- VPSHUFB Y11, Y4, Y8
- VPSHUFB Y12, Y5, Y10
- VPXOR Y8, Y10, Y8
-
- // Store 3 outputs
- VMOVDQU Y6, (BX)
- ADDQ $0x20, BX
- VMOVDQU Y7, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y8, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x3_loop
- VZEROUPPER
-
-mulAvxTwo_1x3_end:
- RET
-
-// func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), BX
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, SI
- ADDQ R8, DI
- ADDQ R8, BX
-
- // Add start offset to input
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y6, Y9, Y9
- VPAND Y6, Y11, Y11
- VPAND Y6, Y10, Y10
- VPAND Y6, Y12, Y12
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y8
- VPSHUFB Y11, Y4, Y5
- VPSHUFB Y9, Y4, Y4
- VPSHUFB Y12, Y8, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y4, Y8, Y0
- VPXOR Y5, Y7, Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y8
- VPSHUFB Y11, Y4, Y5
- VPSHUFB Y9, Y4, Y4
- VPSHUFB Y12, Y8, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y4, Y8, Y2
- VPXOR Y5, Y7, Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y8
- VPSHUFB Y11, Y4, Y5
- VPSHUFB Y9, Y4, Y4
- VPSHUFB Y12, Y8, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y4, Y8, Y4
- VPXOR Y5, Y7, Y5
-
- // Store 3 outputs
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y4, (BX)
- VMOVDQU Y5, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_1x3_64_end:
- RET
-
-// func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x3Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x3Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, BX
- ADDQ DI, SI
- ADDQ DI, DX
-
- // Add start offset to input
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_1x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (CX), Y12
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (BX), Y6
- VPSHUFB Y12, Y0, Y10
- VPSHUFB Y13, Y1, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU (SI), Y7
- VPSHUFB Y12, Y2, Y10
- VPSHUFB Y13, Y3, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU (DX), Y8
- VPSHUFB Y12, Y4, Y10
- VPSHUFB Y13, Y5, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 3 outputs
- VMOVDQU Y6, (BX)
- ADDQ $0x20, BX
- VMOVDQU Y7, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y8, (DX)
- ADDQ $0x20, DX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x3Xor_end:
- RET
-
-// func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), BX
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, SI
- ADDQ R8, DI
- ADDQ R8, BX
-
- // Add start offset to input
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
- VMOVDQU (BX), Y4
- VMOVDQU 32(BX), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y4, (BX)
- VMOVDQU Y5, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x3_64Xor_end:
- RET
-
-// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 17 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), BX
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, BX
-
- // Add start offset to input
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y4, Y6, Y6
- VPAND Y4, Y7, Y7
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y7, Y5, Y5
- VPXOR Y3, Y5, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y7, Y5, Y5
- VPXOR Y3, Y5, Y1
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y7, Y5, Y5
- VPXOR Y3, Y5, Y2
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y7, Y5, Y5
- VPXOR Y3, Y5, Y3
-
- // Store 4 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x4_loop
- VZEROUPPER
-
-mulAvxTwo_1x4_end:
- RET
-
-// func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 17 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), BX
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, BX
-
- // Add start offset to input
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_1x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (BX), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x4Xor_end:
- RET
-
-// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), BX
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, BX
-
- // Add start offset to input
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_1x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y5, Y7, Y7
- VPAND Y5, Y8, Y8
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y4, Y4
- VPSHUFB Y8, Y6, Y6
- VPXOR Y4, Y6, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y4, Y4
- VPSHUFB Y8, Y6, Y6
- VPXOR Y4, Y6, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y4, Y4
- VPSHUFB Y8, Y6, Y6
- VPXOR Y4, Y6, Y2
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y4, Y4
- VPSHUFB Y8, Y6, Y6
- VPXOR Y4, Y6, Y3
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y4, Y4
- VPSHUFB Y8, Y6, Y6
- VPXOR Y4, Y6, Y4
-
- // Store 5 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x5_loop
- VZEROUPPER
-
-mulAvxTwo_1x5_end:
- RET
-
-// func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), BX
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, BX
-
- // Add start offset to input
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_1x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (BX), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x5Xor_end:
- RET
-
-// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), BX
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, BX
-
- // Add start offset to input
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y9, Y9
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y3
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y4
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y5, Y5
- VPSHUFB Y9, Y7, Y7
- VPXOR Y5, Y7, Y5
-
- // Store 6 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x6_loop
- VZEROUPPER
-
-mulAvxTwo_1x6_end:
- RET
-
-// func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), BX
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, BX
-
- // Add start offset to input
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_1x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (BX), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x6Xor_end:
- RET
-
-// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), BX
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, BX
-
- // Add start offset to input
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_1x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y7, Y9, Y9
- VPAND Y7, Y10, Y10
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y4
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y5
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y6, Y6
- VPSHUFB Y10, Y8, Y8
- VPXOR Y6, Y8, Y6
-
- // Store 7 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x7_loop
- VZEROUPPER
-
-mulAvxTwo_1x7_end:
- RET
-
-// func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), BX
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, BX
-
- // Add start offset to input
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_1x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R11), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (BX), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x7Xor_end:
- RET
-
-// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 29 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), BX
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, BX
-
- // Add start offset to input
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_1x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y8, Y10, Y10
- VPAND Y8, Y11, Y11
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y5
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y6
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y7, Y7
- VPSHUFB Y11, Y9, Y9
- VPXOR Y7, Y9, Y7
-
- // Store 8 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x8_loop
- VZEROUPPER
-
-mulAvxTwo_1x8_end:
- RET
-
-// func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 29 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), BX
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, BX
-
- // Add start offset to input
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_1x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU (R11), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU (R12), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU (BX), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x8Xor_end:
- RET
-
-// func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 32 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), R13
- MOVQ 192(BX), BX
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, BX
-
- // Add start offset to input
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_1x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y9, Y11, Y11
- VPAND Y9, Y12, Y12
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y6
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y7
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y8, Y8
- VPSHUFB Y12, Y10, Y10
- VPXOR Y8, Y10, Y8
-
- // Store 9 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y8, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x9_loop
- VZEROUPPER
-
-mulAvxTwo_1x9_end:
- RET
-
-// func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 32 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), R13
- MOVQ 192(BX), BX
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, BX
-
- // Add start offset to input
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_1x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU (R11), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU (R12), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU (R13), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU (BX), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y8, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x9Xor_end:
- RET
-
-// func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), R13
- MOVQ 192(BX), R14
- MOVQ 216(BX), BX
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, BX
-
- // Add start offset to input
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_1x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y10, Y12, Y12
- VPAND Y10, Y13, Y13
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y7
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y8
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y9, Y9
- VPSHUFB Y13, Y11, Y11
- VPXOR Y9, Y11, Y9
-
- // Store 10 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y8, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y9, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x10_loop
- VZEROUPPER
-
-mulAvxTwo_1x10_end:
- RET
-
-// func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_1x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), DX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), DI
- MOVQ 48(BX), R8
- MOVQ 72(BX), R9
- MOVQ 96(BX), R10
- MOVQ 120(BX), R11
- MOVQ 144(BX), R12
- MOVQ 168(BX), R13
- MOVQ 192(BX), R14
- MOVQ 216(BX), BX
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, BX
-
- // Add start offset to input
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_1x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (SI), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU (R11), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU (R12), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU (R13), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU (R14), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU (BX), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- VMOVDQU Y0, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y5, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y6, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y7, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y8, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y9, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_1x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_1x10Xor_end:
- RET
-
-// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 8 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), BX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
-
- // Add start offset to input
- ADDQ SI, DX
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_2x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VPSHUFB Y6, Y0, Y6
- VPSHUFB Y7, Y1, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (CX), Y6
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VPSHUFB Y6, Y2, Y6
- VPSHUFB Y7, Y3, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 1 outputs
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x1_loop
- VZEROUPPER
-
-mulAvxTwo_2x1_end:
- RET
-
-// func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x1_64(SB), $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x1_64_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), BX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
-
- // Add start offset to input
- ADDQ SI, DX
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y9
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y7, Y8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y7, Y7
- VPAND Y6, Y9, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y10, Y10
- VPSHUFB Y7, Y0, Y7
- VPSHUFB Y9, Y0, Y9
- VPSHUFB Y8, Y1, Y8
- VPSHUFB Y10, Y1, Y10
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y9
- ADDQ $0x40, CX
- VPSRLQ $0x04, Y7, Y8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y7, Y7
- VPAND Y6, Y9, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y10, Y10
- VPSHUFB Y7, Y2, Y7
- VPSHUFB Y9, Y2, Y9
- VPSHUFB Y8, Y3, Y8
- VPSHUFB Y10, Y3, Y10
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 1 outputs
- VMOVDQU Y4, (BX)
- VMOVDQU Y5, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_2x1_64_end:
- RET
-
-// func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 8 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), BX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
-
- // Add start offset to input
- ADDQ SI, DX
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_2x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VMOVDQU (BX), Y4
- VPSHUFB Y6, Y0, Y6
- VPSHUFB Y7, Y1, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (CX), Y6
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VPSHUFB Y6, Y2, Y6
- VPSHUFB Y7, Y3, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 1 outputs
- VMOVDQU Y4, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x1Xor_end:
- RET
-
-// func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x1_64Xor(SB), $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x1_64Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), BX
- MOVQ start+72(FP), SI
-
- // Add start offset to output
- ADDQ SI, BX
-
- // Add start offset to input
- ADDQ SI, DX
- ADDQ SI, CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (BX), Y4
- VMOVDQU 32(BX), Y5
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y9
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y7, Y8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y7, Y7
- VPAND Y6, Y9, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y10, Y10
- VPSHUFB Y7, Y0, Y7
- VPSHUFB Y9, Y0, Y9
- VPSHUFB Y8, Y1, Y8
- VPSHUFB Y10, Y1, Y10
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y9
- ADDQ $0x40, CX
- VPSRLQ $0x04, Y7, Y8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y7, Y7
- VPAND Y6, Y9, Y9
- VPAND Y6, Y8, Y8
- VPAND Y6, Y10, Y10
- VPSHUFB Y7, Y2, Y7
- VPSHUFB Y9, Y2, Y9
- VPSHUFB Y8, Y3, Y8
- VPSHUFB Y10, Y3, Y10
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 1 outputs
- VMOVDQU Y4, (BX)
- VMOVDQU Y5, 32(BX)
- ADDQ $0x40, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x1_64Xor_end:
- RET
-
-// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x2(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 15 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x2_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), BX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
- ADDQ DI, BX
-
- // Add start offset to input
- ADDQ DI, DX
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_2x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VPSHUFB Y13, Y0, Y11
- VPSHUFB Y14, Y1, Y12
- VPXOR Y11, Y12, Y8
- VPSHUFB Y13, Y2, Y11
- VPSHUFB Y14, Y3, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (CX), Y13
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VPSHUFB Y13, Y4, Y11
- VPSHUFB Y14, Y5, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VPSHUFB Y13, Y6, Y11
- VPSHUFB Y14, Y7, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 2 outputs
- VMOVDQU Y8, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y9, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x2_loop
- VZEROUPPER
-
-mulAvxTwo_2x2_end:
- RET
-
-// func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 25 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), SI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
- ADDQ R8, SI
-
- // Add start offset to input
- ADDQ R8, BX
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_2x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y2, (SI)
- VMOVDQU Y3, 32(SI)
- ADDQ $0x40, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_2x2_64_end:
- RET
-
-// func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x2Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 15 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x2Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), CX
- MOVQ out_base+48(FP), BX
- MOVQ (BX), SI
- MOVQ 24(BX), BX
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
- ADDQ DI, BX
-
- // Add start offset to input
- ADDQ DI, DX
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_2x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (SI), Y8
- VPSHUFB Y13, Y0, Y11
- VPSHUFB Y14, Y1, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU (BX), Y9
- VPSHUFB Y13, Y2, Y11
- VPSHUFB Y14, Y3, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (CX), Y13
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VPSHUFB Y13, Y4, Y11
- VPSHUFB Y14, Y5, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VPSHUFB Y13, Y6, Y11
- VPSHUFB Y14, Y7, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 2 outputs
- VMOVDQU Y8, (SI)
- ADDQ $0x20, SI
- VMOVDQU Y9, (BX)
- ADDQ $0x20, BX
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x2Xor_end:
- RET
-
-// func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 25 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), SI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
- ADDQ R8, SI
-
- // Add start offset to input
- ADDQ R8, BX
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_2x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (DI), Y0
- VMOVDQU 32(DI), Y1
- VMOVDQU (SI), Y2
- VMOVDQU 32(SI), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y2, (SI)
- VMOVDQU Y3, 32(SI)
- ADDQ $0x40, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x2_64Xor_end:
- RET
-
-// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), SI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, SI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_2x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x3_loop
- VZEROUPPER
-
-mulAvxTwo_2x3_end:
- RET
-
-// func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 34 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), SI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, SI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y2, (R8)
- VMOVDQU Y3, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y4, (SI)
- VMOVDQU Y5, 32(SI)
- ADDQ $0x40, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_2x3_64_end:
- RET
-
-// func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), SI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, SI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_2x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (SI), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x3Xor_end:
- RET
-
-// func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 34 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), SI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, DI
- ADDQ R9, R8
- ADDQ R9, SI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (DI), Y0
- VMOVDQU 32(DI), Y1
- VMOVDQU (R8), Y2
- VMOVDQU 32(R8), Y3
- VMOVDQU (SI), Y4
- VMOVDQU 32(SI), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y2, (R8)
- VMOVDQU Y3, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y4, (SI)
- VMOVDQU Y5, 32(SI)
- ADDQ $0x40, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x3_64Xor_end:
- RET
-
-// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 25 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), SI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, SI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_2x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x4_loop
- VZEROUPPER
-
-mulAvxTwo_2x4_end:
- RET
-
-// func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 25 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), SI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, SI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_2x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (SI), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x4Xor_end:
- RET
-
-// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 30 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), SI
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, SI
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_2x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x5_loop
- VZEROUPPER
-
-mulAvxTwo_2x5_end:
- RET
-
-// func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 30 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), SI
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, SI
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_2x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (SI), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x5Xor_end:
- RET
-
-// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), SI
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, SI
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x6_loop
- VZEROUPPER
-
-mulAvxTwo_2x6_end:
- RET
-
-// func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), SI
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, SI
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_2x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (SI), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x6Xor_end:
- RET
-
-// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), SI
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, SI
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_2x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x7_loop
- VZEROUPPER
-
-mulAvxTwo_2x7_end:
- RET
-
-// func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), SI
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, SI
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_2x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R12), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (SI), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x7Xor_end:
- RET
-
-// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 45 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), SI
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, SI
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_2x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x8_loop
- VZEROUPPER
-
-mulAvxTwo_2x8_end:
- RET
-
-// func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 45 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), SI
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, SI
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_2x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU (R12), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU (R13), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU (SI), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x8Xor_end:
- RET
-
-// func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), SI
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, SI
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_2x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x9_loop
- VZEROUPPER
-
-mulAvxTwo_2x9_end:
- RET
-
-// func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), SI
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, SI
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_2x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU (R12), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU (R13), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU (R14), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU (SI), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x9Xor_end:
- RET
-
-// func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x10(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 55 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), R15
- MOVQ 216(SI), SI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, SI
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_2x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y9, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x10_loop
- VZEROUPPER
-
-mulAvxTwo_2x10_end:
- RET
-
-// func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_2x10Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 55 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), R15
- MOVQ 216(SI), SI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, SI
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_2x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU (R12), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU (R13), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU (R14), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU (R15), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU (SI), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y9, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_2x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_2x10Xor_end:
- RET
-
-// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 10 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), CX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), SI
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
-
- // Add start offset to input
- ADDQ DI, DX
- ADDQ DI, BX
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_3x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y0, Y8
- VPSHUFB Y9, Y1, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y2, Y8
- VPSHUFB Y9, Y3, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (CX), Y8
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y4, Y8
- VPSHUFB Y9, Y5, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 1 outputs
- VMOVDQU Y6, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x1_loop
- VZEROUPPER
-
-mulAvxTwo_3x1_end:
- RET
-
-// func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 18 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), DI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
-
- // Add start offset to input
- ADDQ R8, BX
- ADDQ R8, SI
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_3x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_3x1_64_end:
- RET
-
-// func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 10 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), CX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), SI
- MOVQ start+72(FP), DI
-
- // Add start offset to output
- ADDQ DI, SI
-
- // Add start offset to input
- ADDQ DI, DX
- ADDQ DI, BX
- ADDQ DI, CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_3x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VMOVDQU (SI), Y6
- VPSHUFB Y8, Y0, Y8
- VPSHUFB Y9, Y1, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y2, Y8
- VPSHUFB Y9, Y3, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (CX), Y8
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y4, Y8
- VPSHUFB Y9, Y5, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 1 outputs
- VMOVDQU Y6, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x1Xor_end:
- RET
-
-// func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 18 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), DI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
-
- // Add start offset to input
- ADDQ R8, BX
- ADDQ R8, SI
- ADDQ R8, DX
- MOVQ $0x0000000f, R8
- MOVQ R8, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_3x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (DI), Y0
- VMOVDQU 32(DI), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (DI)
- VMOVDQU Y1, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x1_64Xor_end:
- RET
-
-// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 19 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), DI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
- ADDQ R9, DI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_3x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x2_loop
- VZEROUPPER
-
-mulAvxTwo_3x2_end:
- RET
-
-// func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 33 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), DI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
- ADDQ R9, DI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_3x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_3x2_64_end:
- RET
-
-// func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 19 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), DI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
- ADDQ R9, DI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_3x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (DI), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x2Xor_end:
- RET
-
-// func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 33 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), DI
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
- ADDQ R9, DI
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_3x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R8), Y0
- VMOVDQU 32(R8), Y1
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x2_64Xor_end:
- RET
-
-// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), DI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, DI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_3x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x3_loop
- VZEROUPPER
-
-mulAvxTwo_3x3_end:
- RET
-
-// func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 46 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), DI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, DI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_3x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y2, (R9)
- VMOVDQU Y3, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_3x3_64_end:
- RET
-
-// func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), DI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, DI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_3x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (DI), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x3Xor_end:
- RET
-
-// func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 46 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), DI
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R8
- ADDQ R10, R9
- ADDQ R10, DI
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_3x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R8), Y0
- VMOVDQU 32(R8), Y1
- VMOVDQU (R9), Y2
- VMOVDQU 32(R9), Y3
- VMOVDQU (DI), Y4
- VMOVDQU 32(DI), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y2, (R9)
- VMOVDQU Y3, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y4, (DI)
- VMOVDQU Y5, 32(DI)
- ADDQ $0x40, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x3_64Xor_end:
- RET
-
-// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 33 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), DI
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, DI
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_3x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x4_loop
- VZEROUPPER
-
-mulAvxTwo_3x4_end:
- RET
-
-// func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 33 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), DI
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, DI
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_3x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (DI), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x4Xor_end:
- RET
-
-// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), DI
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, DI
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_3x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x5_loop
- VZEROUPPER
-
-mulAvxTwo_3x5_end:
- RET
-
-// func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), DI
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, DI
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_3x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (DI), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x5Xor_end:
- RET
-
-// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), DI
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, DI
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_3x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x6_loop
- VZEROUPPER
-
-mulAvxTwo_3x6_end:
- RET
-
-// func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), DI
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, DI
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_3x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R12), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (DI), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x6Xor_end:
- RET
-
-// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 54 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), DI
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, DI
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_3x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x7_loop
- VZEROUPPER
-
-mulAvxTwo_3x7_end:
- RET
-
-// func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 54 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), DI
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, DI
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_3x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R12), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R13), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (DI), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x7Xor_end:
- RET
-
-// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 61 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), DI
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, DI
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_3x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x8_loop
- VZEROUPPER
-
-mulAvxTwo_3x8_end:
- RET
-
-// func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 61 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), DI
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, DI
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_3x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU (R12), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU (R13), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU (R14), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU (DI), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x8Xor_end:
- RET
-
-// func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x9(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), R15
- MOVQ 192(DI), DI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, DI
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_3x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x9_loop
- VZEROUPPER
-
-mulAvxTwo_3x9_end:
- RET
-
-// func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x9Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), R15
- MOVQ 192(DI), DI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, DI
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_3x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU (R12), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU (R13), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU (R14), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU (R15), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU (DI), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_3x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x9Xor_end:
- RET
-
-// func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x10(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 75 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x10_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), AX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), R15
- MOVQ 216(SI), SI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, SI
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_3x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (AX), Y13
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y9, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_3x10_loop
- VZEROUPPER
-
-mulAvxTwo_3x10_end:
- RET
-
-// func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_3x10Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 75 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x10Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), AX
- MOVQ out_base+48(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), R15
- MOVQ 216(SI), SI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, SI
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_3x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (DI), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU (R12), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU (R13), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU (R14), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU (R15), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU (SI), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (AX), Y13
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- VMOVDQU Y0, (DI)
- ADDQ $0x20, DI
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y5, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y6, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y7, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y8, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y9, (SI)
- ADDQ $0x20, SI
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_3x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_3x10Xor_end:
- RET
-
-// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 12 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), CX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), DI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
-
- // Add start offset to input
- ADDQ R8, DX
- ADDQ R8, BX
- ADDQ R8, SI
- ADDQ R8, CX
- MOVQ $0x0000000f, R8
- MOVQ R8, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_4x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y0, Y10
- VPSHUFB Y11, Y1, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y2, Y10
- VPSHUFB Y11, Y3, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y4, Y10
- VPSHUFB Y11, Y5, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (CX), Y10
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y6, Y10
- VPSHUFB Y11, Y7, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 1 outputs
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x1_loop
- VZEROUPPER
-
-mulAvxTwo_4x1_end:
- RET
-
-// func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_4x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_4x1_64_end:
- RET
-
-// func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 12 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), CX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), DI
- MOVQ start+72(FP), R8
-
- // Add start offset to output
- ADDQ R8, DI
-
- // Add start offset to input
- ADDQ R8, DX
- ADDQ R8, BX
- ADDQ R8, SI
- ADDQ R8, CX
- MOVQ $0x0000000f, R8
- MOVQ R8, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_4x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VMOVDQU (DI), Y8
- VPSHUFB Y10, Y0, Y10
- VPSHUFB Y11, Y1, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y2, Y10
- VPSHUFB Y11, Y3, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y4, Y10
- VPSHUFB Y11, Y5, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (CX), Y10
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y6, Y10
- VPSHUFB Y11, Y7, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 1 outputs
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x1Xor_end:
- RET
-
-// func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, DX
- MOVQ $0x0000000f, R9
- MOVQ R9, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_4x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R8), Y0
- VMOVDQU 32(R8), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R8)
- VMOVDQU Y1, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x1_64Xor_end:
- RET
-
-// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R8
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
- ADDQ R10, R8
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_4x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x2_loop
- VZEROUPPER
-
-mulAvxTwo_4x2_end:
- RET
-
-// func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 41 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R8
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
- ADDQ R10, R8
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_4x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y2, (R8)
- VMOVDQU Y3, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_4x2_64_end:
- RET
-
-// func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R8
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
- ADDQ R10, R8
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_4x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R8), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x2Xor_end:
- RET
-
-// func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 41 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R8
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
- ADDQ R10, R8
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_4x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R9), Y0
- VMOVDQU 32(R9), Y1
- VMOVDQU (R8), Y2
- VMOVDQU 32(R8), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y2, (R8)
- VMOVDQU Y3, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x2_64Xor_end:
- RET
-
-// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 32 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R8
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, R8
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_4x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x3_loop
- VZEROUPPER
-
-mulAvxTwo_4x3_end:
- RET
-
-// func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 58 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R8
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, R8
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_4x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y2, (R10)
- VMOVDQU Y3, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_4x3_64_end:
- RET
-
-// func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 32 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R8
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, R8
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_4x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R8), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x3Xor_end:
- RET
-
-// func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 58 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R8
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R9
- ADDQ R11, R10
- ADDQ R11, R8
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_4x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R9), Y0
- VMOVDQU 32(R9), Y1
- VMOVDQU (R10), Y2
- VMOVDQU 32(R10), Y3
- VMOVDQU (R8), Y4
- VMOVDQU 32(R8), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y2, (R10)
- VMOVDQU Y3, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x3_64Xor_end:
- RET
-
-// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 41 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R8
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R8
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_4x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x4_loop
- VZEROUPPER
-
-mulAvxTwo_4x4_end:
- RET
-
-// func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 41 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R8
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R8
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_4x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R8), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x4Xor_end:
- RET
-
-// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R8
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R8
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_4x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x5_loop
- VZEROUPPER
-
-mulAvxTwo_4x5_end:
- RET
-
-// func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R8
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R8
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_4x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (R8), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x5Xor_end:
- RET
-
-// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 59 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R8
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R8
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_4x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x6_loop
- VZEROUPPER
-
-mulAvxTwo_4x6_end:
- RET
-
-// func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 59 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R8
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R8
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_4x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R13), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (R8), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x6Xor_end:
- RET
-
-// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R8
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R8
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_4x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x7_loop
- VZEROUPPER
-
-mulAvxTwo_4x7_end:
- RET
-
-// func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R8
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R8
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_4x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R13), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R14), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (R8), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x7Xor_end:
- RET
-
-// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x8(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 77 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R15
- MOVQ 168(R8), R8
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R8
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_4x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y7, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x8_loop
- VZEROUPPER
-
-mulAvxTwo_4x8_end:
- RET
-
-// func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x8Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 77 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R15
- MOVQ 168(R8), R8
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R8
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_4x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU (R13), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU (R14), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU (R15), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU (R8), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y7, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_4x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x8Xor_end:
- RET
-
-// func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x9(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 86 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x9_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), AX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), R15
- MOVQ 192(DI), DI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, DI
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_4x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (AX), Y12
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_4x9_loop
- VZEROUPPER
-
-mulAvxTwo_4x9_end:
- RET
-
-// func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x9Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 86 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x9Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), AX
- MOVQ out_base+48(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), R15
- MOVQ 192(DI), DI
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, DI
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_4x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (R8), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU (R12), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU (R13), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU (R14), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU (R15), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU (DI), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (AX), Y12
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- VMOVDQU Y0, (R8)
- ADDQ $0x20, R8
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y4, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y5, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y6, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y7, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y8, (DI)
- ADDQ $0x20, DI
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_4x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x9Xor_end:
- RET
-
-// func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 95 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_4x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R8), R10
- VMOVDQU Y0, (R10)(R9*1)
- MOVQ 24(R8), R10
- VMOVDQU Y1, (R10)(R9*1)
- MOVQ 48(R8), R10
- VMOVDQU Y2, (R10)(R9*1)
- MOVQ 72(R8), R10
- VMOVDQU Y3, (R10)(R9*1)
- MOVQ 96(R8), R10
- VMOVDQU Y4, (R10)(R9*1)
- MOVQ 120(R8), R10
- VMOVDQU Y5, (R10)(R9*1)
- MOVQ 144(R8), R10
- VMOVDQU Y6, (R10)(R9*1)
- MOVQ 168(R8), R10
- VMOVDQU Y7, (R10)(R9*1)
- MOVQ 192(R8), R10
- VMOVDQU Y8, (R10)(R9*1)
- MOVQ 216(R8), R10
- VMOVDQU Y9, (R10)(R9*1)
-
- // Prepare for next loop
- ADDQ $0x20, R9
- DECQ AX
- JNZ mulAvxTwo_4x10_loop
- VZEROUPPER
-
-mulAvxTwo_4x10_end:
- RET
-
-// func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_4x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 95 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), DX
- MOVQ out_base+48(FP), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to input
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_4x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R8), R10
- VMOVDQU (R10)(R9*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R8), R10
- VMOVDQU (R10)(R9*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R8), R10
- VMOVDQU (R10)(R9*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R8), R10
- VMOVDQU (R10)(R9*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R8), R10
- VMOVDQU (R10)(R9*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R8), R10
- VMOVDQU (R10)(R9*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R8), R10
- VMOVDQU (R10)(R9*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R8), R10
- VMOVDQU (R10)(R9*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R8), R10
- VMOVDQU (R10)(R9*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R8), R10
- VMOVDQU (R10)(R9*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R8), R10
- VMOVDQU Y0, (R10)(R9*1)
- MOVQ 24(R8), R10
- VMOVDQU Y1, (R10)(R9*1)
- MOVQ 48(R8), R10
- VMOVDQU Y2, (R10)(R9*1)
- MOVQ 72(R8), R10
- VMOVDQU Y3, (R10)(R9*1)
- MOVQ 96(R8), R10
- VMOVDQU Y4, (R10)(R9*1)
- MOVQ 120(R8), R10
- VMOVDQU Y5, (R10)(R9*1)
- MOVQ 144(R8), R10
- VMOVDQU Y6, (R10)(R9*1)
- MOVQ 168(R8), R10
- VMOVDQU Y7, (R10)(R9*1)
- MOVQ 192(R8), R10
- VMOVDQU Y8, (R10)(R9*1)
- MOVQ 216(R8), R10
- VMOVDQU Y9, (R10)(R9*1)
-
- // Prepare for next loop
- ADDQ $0x20, R9
- DECQ AX
- JNZ mulAvxTwo_4x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_4x10Xor_end:
- RET
-
-// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), CX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
-
- // Add start offset to input
- ADDQ R9, DX
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, CX
- MOVQ $0x0000000f, R9
- MOVQ R9, X11
- VPBROADCASTB X11, Y11
-
-mulAvxTwo_5x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y0, Y12
- VPSHUFB Y13, Y1, Y13
- VPXOR Y12, Y13, Y10
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y2, Y12
- VPSHUFB Y13, Y3, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y4, Y12
- VPSHUFB Y13, Y5, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y6, Y12
- VPSHUFB Y13, Y7, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (CX), Y12
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y8, Y12
- VPSHUFB Y13, Y9, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Store 1 outputs
- VMOVDQU Y10, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x1_loop
- VZEROUPPER
-
-mulAvxTwo_5x1_end:
- RET
-
-// func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_5x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_5x1_64_end:
- RET
-
-// func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), CX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R8
- MOVQ start+72(FP), R9
-
- // Add start offset to output
- ADDQ R9, R8
-
- // Add start offset to input
- ADDQ R9, DX
- ADDQ R9, BX
- ADDQ R9, SI
- ADDQ R9, DI
- ADDQ R9, CX
- MOVQ $0x0000000f, R9
- MOVQ R9, X11
- VPBROADCASTB X11, Y11
-
-mulAvxTwo_5x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VMOVDQU (R8), Y10
- VPSHUFB Y12, Y0, Y12
- VPSHUFB Y13, Y1, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y2, Y12
- VPSHUFB Y13, Y3, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y4, Y12
- VPSHUFB Y13, Y5, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y6, Y12
- VPSHUFB Y13, Y7, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (CX), Y12
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y8, Y12
- VPSHUFB Y13, Y9, Y13
- XOR3WAY( $0x00, Y12, Y13, Y10)
-
- // Store 1 outputs
- VMOVDQU Y10, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x1Xor_end:
- RET
-
-// func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R10
- MOVQ R10, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_5x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R9), Y0
- VMOVDQU 32(R9), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R9)
- VMOVDQU Y1, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x1_64Xor_end:
- RET
-
-// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 27 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R9
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
- ADDQ R11, R9
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_5x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x2_loop
- VZEROUPPER
-
-mulAvxTwo_5x2_end:
- RET
-
-// func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 49 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R9
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
- ADDQ R11, R9
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_5x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y2, (R9)
- VMOVDQU Y3, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_5x2_64_end:
- RET
-
-// func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 27 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R9
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
- ADDQ R11, R9
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_5x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R9), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x2Xor_end:
- RET
-
-// func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 49 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R9
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
- ADDQ R11, R9
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_5x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R10), Y0
- VMOVDQU 32(R10), Y1
- VMOVDQU (R9), Y2
- VMOVDQU 32(R9), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y2, (R9)
- VMOVDQU Y3, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x2_64Xor_end:
- RET
-
-// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 38 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R9
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R9
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_5x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x3_loop
- VZEROUPPER
-
-mulAvxTwo_5x3_end:
- RET
-
-// func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 70 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R9
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R9
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_5x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y2, (R11)
- VMOVDQU Y3, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y4, (R9)
- VMOVDQU Y5, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_5x3_64_end:
- RET
-
-// func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 38 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R9
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R9
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_5x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R9), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x3Xor_end:
- RET
-
-// func mulAvxTwo_5x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 70 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R9
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R10
- ADDQ R12, R11
- ADDQ R12, R9
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_5x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R10), Y0
- VMOVDQU 32(R10), Y1
- VMOVDQU (R11), Y2
- VMOVDQU 32(R11), Y3
- VMOVDQU (R9), Y4
- VMOVDQU 32(R9), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y2, (R11)
- VMOVDQU Y3, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y4, (R9)
- VMOVDQU Y5, 32(R9)
- ADDQ $0x40, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x3_64Xor_end:
- RET
-
-// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 49 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R9
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R9
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_5x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x4_loop
- VZEROUPPER
-
-mulAvxTwo_5x4_end:
- RET
-
-// func mulAvxTwo_5x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 49 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R9
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R9
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_5x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R9), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x4Xor_end:
- RET
-
-// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 60 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R9
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R9
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_5x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x5_loop
- VZEROUPPER
-
-mulAvxTwo_5x5_end:
- RET
-
-// func mulAvxTwo_5x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 60 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R9
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R9
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_5x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R13), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (R9), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x5Xor_end:
- RET
-
-// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 71 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R9
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R9
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_5x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x6_loop
- VZEROUPPER
-
-mulAvxTwo_5x6_end:
- RET
-
-// func mulAvxTwo_5x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 71 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R9
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R9
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_5x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R13), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R14), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (R9), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x6Xor_end:
- RET
-
-// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x7(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 82 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R15
- MOVQ 144(R9), R9
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R9
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_5x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y6, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x7_loop
- VZEROUPPER
-
-mulAvxTwo_5x7_end:
- RET
-
-// func mulAvxTwo_5x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x7Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 82 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R15
- MOVQ 144(R9), R9
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R9
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_5x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R13), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R14), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R15), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (R9), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y6, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_5x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x7Xor_end:
- RET
-
-// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x8(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 93 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x8_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), AX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R15
- MOVQ 168(R8), R8
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R8
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_5x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (AX), Y11
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y7, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_5x8_loop
- VZEROUPPER
-
-mulAvxTwo_5x8_end:
- RET
-
-// func mulAvxTwo_5x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x8Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 93 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x8Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), AX
- MOVQ out_base+48(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R15
- MOVQ 168(R8), R8
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R8
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_5x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (R9), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU (R13), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU (R14), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU (R15), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU (R8), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (AX), Y11
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- VMOVDQU Y0, (R9)
- ADDQ $0x20, R9
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y4, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y5, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y6, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y7, (R8)
- ADDQ $0x20, R8
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_5x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x8Xor_end:
- RET
-
-// func mulAvxTwo_5x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 104 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_5x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R9), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(R9), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(R9), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(R9), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(R9), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(R9), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(R9), R11
- VMOVDQU Y6, (R11)(R10*1)
- MOVQ 168(R9), R11
- VMOVDQU Y7, (R11)(R10*1)
- MOVQ 192(R9), R11
- VMOVDQU Y8, (R11)(R10*1)
-
- // Prepare for next loop
- ADDQ $0x20, R10
- DECQ AX
- JNZ mulAvxTwo_5x9_loop
- VZEROUPPER
-
-mulAvxTwo_5x9_end:
- RET
-
-// func mulAvxTwo_5x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 104 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_5x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R9), R11
- VMOVDQU (R11)(R10*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R9), R11
- VMOVDQU (R11)(R10*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R9), R11
- VMOVDQU (R11)(R10*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R9), R11
- VMOVDQU (R11)(R10*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R9), R11
- VMOVDQU (R11)(R10*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R9), R11
- VMOVDQU (R11)(R10*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R9), R11
- VMOVDQU (R11)(R10*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R9), R11
- VMOVDQU (R11)(R10*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R9), R11
- VMOVDQU (R11)(R10*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R9), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(R9), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(R9), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(R9), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(R9), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(R9), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(R9), R11
- VMOVDQU Y6, (R11)(R10*1)
- MOVQ 168(R9), R11
- VMOVDQU Y7, (R11)(R10*1)
- MOVQ 192(R9), R11
- VMOVDQU Y8, (R11)(R10*1)
-
- // Prepare for next loop
- ADDQ $0x20, R10
- DECQ AX
- JNZ mulAvxTwo_5x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x9Xor_end:
- RET
-
-// func mulAvxTwo_5x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 115 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_5x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R9), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(R9), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(R9), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(R9), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(R9), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(R9), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(R9), R11
- VMOVDQU Y6, (R11)(R10*1)
- MOVQ 168(R9), R11
- VMOVDQU Y7, (R11)(R10*1)
- MOVQ 192(R9), R11
- VMOVDQU Y8, (R11)(R10*1)
- MOVQ 216(R9), R11
- VMOVDQU Y9, (R11)(R10*1)
-
- // Prepare for next loop
- ADDQ $0x20, R10
- DECQ AX
- JNZ mulAvxTwo_5x10_loop
- VZEROUPPER
-
-mulAvxTwo_5x10_end:
- RET
-
-// func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_5x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 115 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), DX
- MOVQ out_base+48(FP), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to input
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_5x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R9), R11
- VMOVDQU (R11)(R10*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R9), R11
- VMOVDQU (R11)(R10*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R9), R11
- VMOVDQU (R11)(R10*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R9), R11
- VMOVDQU (R11)(R10*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R9), R11
- VMOVDQU (R11)(R10*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R9), R11
- VMOVDQU (R11)(R10*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R9), R11
- VMOVDQU (R11)(R10*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R9), R11
- VMOVDQU (R11)(R10*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R9), R11
- VMOVDQU (R11)(R10*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R9), R11
- VMOVDQU (R11)(R10*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R9), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(R9), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(R9), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(R9), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(R9), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(R9), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(R9), R11
- VMOVDQU Y6, (R11)(R10*1)
- MOVQ 168(R9), R11
- VMOVDQU Y7, (R11)(R10*1)
- MOVQ 192(R9), R11
- VMOVDQU Y8, (R11)(R10*1)
- MOVQ 216(R9), R11
- VMOVDQU Y9, (R11)(R10*1)
-
- // Prepare for next loop
- ADDQ $0x20, R10
- DECQ AX
- JNZ mulAvxTwo_5x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_5x10Xor_end:
- RET
-
-// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x1(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 16 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x1_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), R8
- MOVQ 120(CX), CX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
-
- // Add start offset to input
- ADDQ R10, DX
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, CX
- MOVQ $0x0000000f, R10
- MOVQ R10, X13
- VPBROADCASTB X13, Y13
-
-mulAvxTwo_6x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y14
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y0, Y14
- VPSHUFB Y15, Y1, Y15
- VPXOR Y14, Y15, Y12
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y14
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y2, Y14
- VPSHUFB Y15, Y3, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y14
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y4, Y14
- VPSHUFB Y15, Y5, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI), Y14
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y6, Y14
- VPSHUFB Y15, Y7, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R8), Y14
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y8, Y14
- VPSHUFB Y15, Y9, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (CX), Y14
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y10, Y14
- VPSHUFB Y15, Y11, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Store 1 outputs
- VMOVDQU Y12, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x1_loop
- VZEROUPPER
-
-mulAvxTwo_6x1_end:
- RET
-
-// func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 30 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_6x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_6x1_64_end:
- RET
-
-// func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x1Xor(SB), NOSPLIT, $0-88
- // Loading all tables to registers
- // Destination kept in GP registers
- // Full registers estimated 16 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x1Xor_end
- VMOVDQU (CX), Y0
- VMOVDQU 32(CX), Y1
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- MOVQ in_base+24(FP), CX
- MOVQ (CX), DX
- MOVQ 24(CX), BX
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), R8
- MOVQ 120(CX), CX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R9
- MOVQ start+72(FP), R10
-
- // Add start offset to output
- ADDQ R10, R9
-
- // Add start offset to input
- ADDQ R10, DX
- ADDQ R10, BX
- ADDQ R10, SI
- ADDQ R10, DI
- ADDQ R10, R8
- ADDQ R10, CX
- MOVQ $0x0000000f, R10
- MOVQ R10, X13
- VPBROADCASTB X13, Y13
-
-mulAvxTwo_6x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (DX), Y14
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VMOVDQU (R9), Y12
- VPSHUFB Y14, Y0, Y14
- VPSHUFB Y15, Y1, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BX), Y14
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y2, Y14
- VPSHUFB Y15, Y3, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI), Y14
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y4, Y14
- VPSHUFB Y15, Y5, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI), Y14
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y6, Y14
- VPSHUFB Y15, Y7, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R8), Y14
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y8, Y14
- VPSHUFB Y15, Y9, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (CX), Y14
- ADDQ $0x20, CX
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y10, Y14
- VPSHUFB Y15, Y11, Y15
- XOR3WAY( $0x00, Y14, Y15, Y12)
-
- // Store 1 outputs
- VMOVDQU Y12, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x1Xor_end:
- RET
-
-// func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 30 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to output
- ADDQ R11, R10
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R11
- MOVQ R11, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_6x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R10), Y0
- VMOVDQU 32(R10), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R10)
- VMOVDQU Y1, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x1_64Xor_end:
- RET
-
-// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 31 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R10
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
- ADDQ R12, R10
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_6x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x2_loop
- VZEROUPPER
-
-mulAvxTwo_6x2_end:
- RET
-
-// func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 57 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R10
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
- ADDQ R12, R10
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_6x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y2, (R10)
- VMOVDQU Y3, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_6x2_64_end:
- RET
-
-// func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 31 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R10
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
- ADDQ R12, R10
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_6x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R10), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x2Xor_end:
- RET
-
-// func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 57 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R10
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
- ADDQ R12, R10
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_6x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R11), Y0
- VMOVDQU 32(R11), Y1
- VMOVDQU (R10), Y2
- VMOVDQU 32(R10), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y2, (R10)
- VMOVDQU Y3, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x2_64Xor_end:
- RET
-
-// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 44 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R10
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R10
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_6x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x3_loop
- VZEROUPPER
-
-mulAvxTwo_6x3_end:
- RET
-
-// func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 82 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R10
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R10
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_6x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y2, (R12)
- VMOVDQU Y3, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y4, (R10)
- VMOVDQU Y5, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_6x3_64_end:
- RET
-
-// func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 44 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R10
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R10
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_6x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R10), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x3Xor_end:
- RET
-
-// func mulAvxTwo_6x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 82 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R10
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R11
- ADDQ R13, R12
- ADDQ R13, R10
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_6x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R11), Y0
- VMOVDQU 32(R11), Y1
- VMOVDQU (R12), Y2
- VMOVDQU 32(R12), Y3
- VMOVDQU (R10), Y4
- VMOVDQU 32(R10), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y2, (R12)
- VMOVDQU Y3, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y4, (R10)
- VMOVDQU Y5, 32(R10)
- ADDQ $0x40, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x3_64Xor_end:
- RET
-
-// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 57 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R10
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R10
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_6x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x4_loop
- VZEROUPPER
-
-mulAvxTwo_6x4_end:
- RET
-
-// func mulAvxTwo_6x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 57 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R10
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R10
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_6x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R10), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x4Xor_end:
- RET
-
-// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 70 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R10
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R10
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_6x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x5_loop
- VZEROUPPER
-
-mulAvxTwo_6x5_end:
- RET
-
-// func mulAvxTwo_6x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 70 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R10
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R10
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_6x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R14), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (R10), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x5Xor_end:
- RET
-
-// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x6(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 83 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R15
- MOVQ 120(R10), R10
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R10
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_6x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y5, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x6_loop
- VZEROUPPER
-
-mulAvxTwo_6x6_end:
- RET
-
-// func mulAvxTwo_6x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x6Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 83 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R15
- MOVQ 120(R10), R10
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R10
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_6x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R14), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R15), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (R10), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y5, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_6x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x6Xor_end:
- RET
-
-// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x7(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 96 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x7_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), AX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R15
- MOVQ 144(R9), R9
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R9
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_6x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (AX), Y10
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y6, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_6x7_loop
- VZEROUPPER
-
-mulAvxTwo_6x7_end:
- RET
-
-// func mulAvxTwo_6x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x7Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 96 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x7Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), AX
- MOVQ out_base+48(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R15
- MOVQ 144(R9), R9
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R9
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_6x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (R10), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU (R13), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU (R14), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU (R15), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU (R9), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (AX), Y10
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- VMOVDQU Y0, (R10)
- ADDQ $0x20, R10
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y3, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y4, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y5, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y6, (R9)
- ADDQ $0x20, R9
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_6x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x7Xor_end:
- RET
-
-// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 109 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_6x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x8_loop
- VZEROUPPER
-
-mulAvxTwo_6x8_end:
- RET
-
-// func mulAvxTwo_6x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 109 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_6x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- MOVQ (R10), R12
- VMOVDQU (R12)(R11*1), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- MOVQ 24(R10), R12
- VMOVDQU (R12)(R11*1), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- MOVQ 48(R10), R12
- VMOVDQU (R12)(R11*1), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- MOVQ 72(R10), R12
- VMOVDQU (R12)(R11*1), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- MOVQ 96(R10), R12
- VMOVDQU (R12)(R11*1), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- MOVQ 120(R10), R12
- VMOVDQU (R12)(R11*1), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- MOVQ 144(R10), R12
- VMOVDQU (R12)(R11*1), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- MOVQ 168(R10), R12
- VMOVDQU (R12)(R11*1), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x8Xor_end:
- RET
-
-// func mulAvxTwo_6x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 122 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_6x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
- MOVQ 192(R10), R12
- VMOVDQU Y8, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x9_loop
- VZEROUPPER
-
-mulAvxTwo_6x9_end:
- RET
-
-// func mulAvxTwo_6x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 122 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_6x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R10), R12
- VMOVDQU (R12)(R11*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R10), R12
- VMOVDQU (R12)(R11*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R10), R12
- VMOVDQU (R12)(R11*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R10), R12
- VMOVDQU (R12)(R11*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R10), R12
- VMOVDQU (R12)(R11*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R10), R12
- VMOVDQU (R12)(R11*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R10), R12
- VMOVDQU (R12)(R11*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R10), R12
- VMOVDQU (R12)(R11*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R10), R12
- VMOVDQU (R12)(R11*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
- MOVQ 192(R10), R12
- VMOVDQU Y8, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x9Xor_end:
- RET
-
-// func mulAvxTwo_6x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 135 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_6x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
- MOVQ 192(R10), R12
- VMOVDQU Y8, (R12)(R11*1)
- MOVQ 216(R10), R12
- VMOVDQU Y9, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x10_loop
- VZEROUPPER
-
-mulAvxTwo_6x10_end:
- RET
-
-// func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_6x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 135 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), DX
- MOVQ out_base+48(FP), R10
- MOVQ start+72(FP), R11
-
- // Add start offset to input
- ADDQ R11, BX
- ADDQ R11, SI
- ADDQ R11, DI
- ADDQ R11, R8
- ADDQ R11, R9
- ADDQ R11, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_6x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R10), R12
- VMOVDQU (R12)(R11*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R10), R12
- VMOVDQU (R12)(R11*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R10), R12
- VMOVDQU (R12)(R11*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R10), R12
- VMOVDQU (R12)(R11*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R10), R12
- VMOVDQU (R12)(R11*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R10), R12
- VMOVDQU (R12)(R11*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R10), R12
- VMOVDQU (R12)(R11*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R10), R12
- VMOVDQU (R12)(R11*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R10), R12
- VMOVDQU (R12)(R11*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R10), R12
- VMOVDQU (R12)(R11*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R10), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(R10), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(R10), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(R10), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(R10), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(R10), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(R10), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(R10), R12
- VMOVDQU Y7, (R12)(R11*1)
- MOVQ 192(R10), R12
- VMOVDQU Y8, (R12)(R11*1)
- MOVQ 216(R10), R12
- VMOVDQU Y9, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_6x10Xor_end:
- RET
-
-// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x1(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 18 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x1_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_7x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x1_loop
- VZEROUPPER
-
-mulAvxTwo_7x1_end:
- RET
-
-// func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 34 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_7x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_7x1_64_end:
- RET
-
-// func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x1Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 18 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x1Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_7x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x1Xor_end:
- RET
-
-// func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 34 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to output
- ADDQ R12, R11
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R12
- MOVQ R12, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_7x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R11), Y0
- VMOVDQU 32(R11), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R11)
- VMOVDQU Y1, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x1_64Xor_end:
- RET
-
-// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R11
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
- ADDQ R13, R11
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_7x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x2_loop
- VZEROUPPER
-
-mulAvxTwo_7x2_end:
- RET
-
-// func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 65 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R11
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
- ADDQ R13, R11
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_7x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y2, (R11)
- VMOVDQU Y3, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_7x2_64_end:
- RET
-
-// func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R11
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
- ADDQ R13, R11
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_7x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R11), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x2Xor_end:
- RET
-
-// func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 65 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R11
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
- ADDQ R13, R11
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_7x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R12), Y0
- VMOVDQU 32(R12), Y1
- VMOVDQU (R11), Y2
- VMOVDQU 32(R11), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y2, (R11)
- VMOVDQU Y3, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x2_64Xor_end:
- RET
-
-// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R11
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R11
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_7x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x3_loop
- VZEROUPPER
-
-mulAvxTwo_7x3_end:
- RET
-
-// func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 94 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R11
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R11
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_7x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y2, (R13)
- VMOVDQU Y3, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y4, (R11)
- VMOVDQU Y5, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_7x3_64_end:
- RET
-
-// func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R11
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R11
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_7x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R13), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R11), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x3Xor_end:
- RET
-
-// func mulAvxTwo_7x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 94 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R11
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R12
- ADDQ R14, R13
- ADDQ R14, R11
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_7x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R12), Y0
- VMOVDQU 32(R12), Y1
- VMOVDQU (R13), Y2
- VMOVDQU 32(R13), Y3
- VMOVDQU (R11), Y4
- VMOVDQU 32(R11), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
- VMOVDQU Y2, (R13)
- VMOVDQU Y3, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y4, (R11)
- VMOVDQU Y5, 32(R11)
- ADDQ $0x40, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x3_64Xor_end:
- RET
-
-// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x4(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 65 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R11
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R11
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_7x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x4_loop
- VZEROUPPER
-
-mulAvxTwo_7x4_end:
- RET
-
-// func mulAvxTwo_7x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x4Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 65 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R11
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R11
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_7x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R13), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R14), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R11), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x4Xor_end:
- RET
-
-// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x5(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 80 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R15
- MOVQ 96(R11), R11
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R11
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_7x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x5_loop
- VZEROUPPER
-
-mulAvxTwo_7x5_end:
- RET
-
-// func mulAvxTwo_7x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x5Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 80 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R15
- MOVQ 96(R11), R11
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R11
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_7x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R13), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R14), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R15), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_7x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x5Xor_end:
- RET
-
-// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x6(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 95 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x6_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), AX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R15
- MOVQ 120(R10), R10
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R10
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_7x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (AX), Y9
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y5, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_7x6_loop
- VZEROUPPER
-
-mulAvxTwo_7x6_end:
- RET
-
-// func mulAvxTwo_7x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x6Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 95 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x6Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), AX
- MOVQ out_base+48(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R15
- MOVQ 120(R10), R10
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R10
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_7x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (R11), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU (R14), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU (R15), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU (R10), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (AX), Y9
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- VMOVDQU Y0, (R11)
- ADDQ $0x20, R11
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y3, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y4, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y5, (R10)
- ADDQ $0x20, R10
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_7x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x6Xor_end:
- RET
-
-// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_7x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x7_loop
- VZEROUPPER
-
-mulAvxTwo_7x7_end:
- RET
-
-// func mulAvxTwo_7x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_7x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- MOVQ (R11), R13
- VMOVDQU (R13)(R12*1), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- MOVQ 24(R11), R13
- VMOVDQU (R13)(R12*1), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- MOVQ 48(R11), R13
- VMOVDQU (R13)(R12*1), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- MOVQ 72(R11), R13
- VMOVDQU (R13)(R12*1), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- MOVQ 96(R11), R13
- VMOVDQU (R13)(R12*1), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- MOVQ 120(R11), R13
- VMOVDQU (R13)(R12*1), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- MOVQ 144(R11), R13
- VMOVDQU (R13)(R12*1), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x7Xor_end:
- RET
-
-// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 125 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_7x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x8_loop
- VZEROUPPER
-
-mulAvxTwo_7x8_end:
- RET
-
-// func mulAvxTwo_7x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 125 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_7x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- MOVQ (R11), R13
- VMOVDQU (R13)(R12*1), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- MOVQ 24(R11), R13
- VMOVDQU (R13)(R12*1), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- MOVQ 48(R11), R13
- VMOVDQU (R13)(R12*1), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- MOVQ 72(R11), R13
- VMOVDQU (R13)(R12*1), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- MOVQ 96(R11), R13
- VMOVDQU (R13)(R12*1), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- MOVQ 120(R11), R13
- VMOVDQU (R13)(R12*1), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- MOVQ 144(R11), R13
- VMOVDQU (R13)(R12*1), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- MOVQ 168(R11), R13
- VMOVDQU (R13)(R12*1), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x8Xor_end:
- RET
-
-// func mulAvxTwo_7x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 140 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_7x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
- MOVQ 192(R11), R13
- VMOVDQU Y8, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x9_loop
- VZEROUPPER
-
-mulAvxTwo_7x9_end:
- RET
-
-// func mulAvxTwo_7x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 140 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_7x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R11), R13
- VMOVDQU (R13)(R12*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R11), R13
- VMOVDQU (R13)(R12*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R11), R13
- VMOVDQU (R13)(R12*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R11), R13
- VMOVDQU (R13)(R12*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R11), R13
- VMOVDQU (R13)(R12*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R11), R13
- VMOVDQU (R13)(R12*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R11), R13
- VMOVDQU (R13)(R12*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R11), R13
- VMOVDQU (R13)(R12*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R11), R13
- VMOVDQU (R13)(R12*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
- MOVQ 192(R11), R13
- VMOVDQU Y8, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x9Xor_end:
- RET
-
-// func mulAvxTwo_7x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 155 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_7x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
- MOVQ 192(R11), R13
- VMOVDQU Y8, (R13)(R12*1)
- MOVQ 216(R11), R13
- VMOVDQU Y9, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x10_loop
- VZEROUPPER
-
-mulAvxTwo_7x10_end:
- RET
-
-// func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_7x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 155 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), DX
- MOVQ out_base+48(FP), R11
- MOVQ start+72(FP), R12
-
- // Add start offset to input
- ADDQ R12, BX
- ADDQ R12, SI
- ADDQ R12, DI
- ADDQ R12, R8
- ADDQ R12, R9
- ADDQ R12, R10
- ADDQ R12, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_7x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R11), R13
- VMOVDQU (R13)(R12*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R11), R13
- VMOVDQU (R13)(R12*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R11), R13
- VMOVDQU (R13)(R12*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R11), R13
- VMOVDQU (R13)(R12*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R11), R13
- VMOVDQU (R13)(R12*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R11), R13
- VMOVDQU (R13)(R12*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R11), R13
- VMOVDQU (R13)(R12*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R11), R13
- VMOVDQU (R13)(R12*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R11), R13
- VMOVDQU (R13)(R12*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R11), R13
- VMOVDQU (R13)(R12*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R11), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(R11), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(R11), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(R11), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(R11), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(R11), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(R11), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(R11), R13
- VMOVDQU Y7, (R13)(R12*1)
- MOVQ 192(R11), R13
- VMOVDQU Y8, (R13)(R12*1)
- MOVQ 216(R11), R13
- VMOVDQU Y9, (R13)(R12*1)
-
- // Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_7x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_7x10Xor_end:
- RET
-
-// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x1(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x1_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_8x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x1_loop
- VZEROUPPER
-
-mulAvxTwo_8x1_end:
- RET
-
-// func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 38 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_8x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_8x1_64_end:
- RET
-
-// func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x1Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x1Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_8x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x1Xor_end:
- RET
-
-// func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 38 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to output
- ADDQ R13, R12
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R13
- MOVQ R13, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_8x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R12), Y0
- VMOVDQU 32(R12), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R12)
- VMOVDQU Y1, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x1_64Xor_end:
- RET
-
-// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 39 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R12
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
- ADDQ R14, R12
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_8x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x2_loop
- VZEROUPPER
-
-mulAvxTwo_8x2_end:
- RET
-
-// func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 73 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R12
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
- ADDQ R14, R12
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_8x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y2, (R12)
- VMOVDQU Y3, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_8x2_64_end:
- RET
-
-// func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 39 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R12
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
- ADDQ R14, R12
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_8x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R13), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R12), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x2Xor_end:
- RET
-
-// func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 73 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R12
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
- ADDQ R14, R12
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_8x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R13), Y0
- VMOVDQU 32(R13), Y1
- VMOVDQU (R12), Y2
- VMOVDQU 32(R12), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y2, (R12)
- VMOVDQU Y3, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x2_64Xor_end:
- RET
-
-// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x3(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 56 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R12
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R12
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_8x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x3_loop
- VZEROUPPER
-
-mulAvxTwo_8x3_end:
- RET
-
-// func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x3_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 106 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R12
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R12
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_8x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y2, (R14)
- VMOVDQU Y3, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y4, (R12)
- VMOVDQU Y5, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_8x3_64_end:
- RET
-
-// func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x3Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 56 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R12
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R12
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_8x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R13), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R14), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R12), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x3Xor_end:
- RET
-
-// func mulAvxTwo_8x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x3_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 106 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R12
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R13
- ADDQ R15, R14
- ADDQ R15, R12
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_8x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R13), Y0
- VMOVDQU 32(R13), Y1
- VMOVDQU (R14), Y2
- VMOVDQU 32(R14), Y3
- VMOVDQU (R12), Y4
- VMOVDQU 32(R12), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
- VMOVDQU Y2, (R14)
- VMOVDQU Y3, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y4, (R12)
- VMOVDQU Y5, 32(R12)
- ADDQ $0x40, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x3_64Xor_end:
- RET
-
-// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x4(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 73 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R15
- MOVQ 72(R12), R12
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R12
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_8x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x4_loop
- VZEROUPPER
-
-mulAvxTwo_8x4_end:
- RET
-
-// func mulAvxTwo_8x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x4Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 73 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R15
- MOVQ 72(R12), R12
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R12
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_8x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R13), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R14), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R15), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_8x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x4Xor_end:
- RET
-
-// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x5(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 90 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x5_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), AX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R15
- MOVQ 96(R11), R11
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R11
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_8x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (AX), Y8
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_8x5_loop
- VZEROUPPER
-
-mulAvxTwo_8x5_end:
- RET
-
-// func mulAvxTwo_8x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x5Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 90 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x5Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), AX
- MOVQ out_base+48(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R15
- MOVQ 96(R11), R11
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R11
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_8x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (R12), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU (R13), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU (R14), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU (R15), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU (R11), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (AX), Y8
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- VMOVDQU Y0, (R12)
- ADDQ $0x20, R12
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y2, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y3, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y4, (R11)
- ADDQ $0x20, R11
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_8x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x5Xor_end:
- RET
-
-// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 107 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_8x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x6_loop
- VZEROUPPER
-
-mulAvxTwo_8x6_end:
- RET
-
-// func mulAvxTwo_8x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 107 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_8x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- MOVQ (R12), R14
- VMOVDQU (R14)(R13*1), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- MOVQ 24(R12), R14
- VMOVDQU (R14)(R13*1), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- MOVQ 48(R12), R14
- VMOVDQU (R14)(R13*1), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- MOVQ 72(R12), R14
- VMOVDQU (R14)(R13*1), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- MOVQ 96(R12), R14
- VMOVDQU (R14)(R13*1), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- MOVQ 120(R12), R14
- VMOVDQU (R14)(R13*1), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x6Xor_end:
- RET
-
-// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 124 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_8x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x7_loop
- VZEROUPPER
-
-mulAvxTwo_8x7_end:
- RET
-
-// func mulAvxTwo_8x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 124 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_8x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- MOVQ (R12), R14
- VMOVDQU (R14)(R13*1), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- MOVQ 24(R12), R14
- VMOVDQU (R14)(R13*1), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- MOVQ 48(R12), R14
- VMOVDQU (R14)(R13*1), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- MOVQ 72(R12), R14
- VMOVDQU (R14)(R13*1), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- MOVQ 96(R12), R14
- VMOVDQU (R14)(R13*1), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- MOVQ 120(R12), R14
- VMOVDQU (R14)(R13*1), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- MOVQ 144(R12), R14
- VMOVDQU (R14)(R13*1), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x7Xor_end:
- RET
-
-// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 141 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_8x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x8_loop
- VZEROUPPER
-
-mulAvxTwo_8x8_end:
- RET
-
-// func mulAvxTwo_8x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 141 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_8x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- MOVQ (R12), R14
- VMOVDQU (R14)(R13*1), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- MOVQ 24(R12), R14
- VMOVDQU (R14)(R13*1), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- MOVQ 48(R12), R14
- VMOVDQU (R14)(R13*1), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- MOVQ 72(R12), R14
- VMOVDQU (R14)(R13*1), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- MOVQ 96(R12), R14
- VMOVDQU (R14)(R13*1), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- MOVQ 120(R12), R14
- VMOVDQU (R14)(R13*1), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- MOVQ 144(R12), R14
- VMOVDQU (R14)(R13*1), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- MOVQ 168(R12), R14
- VMOVDQU (R14)(R13*1), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x8Xor_end:
- RET
-
-// func mulAvxTwo_8x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 158 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_8x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
- MOVQ 192(R12), R14
- VMOVDQU Y8, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x9_loop
- VZEROUPPER
-
-mulAvxTwo_8x9_end:
- RET
-
-// func mulAvxTwo_8x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 158 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_8x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R12), R14
- VMOVDQU (R14)(R13*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R12), R14
- VMOVDQU (R14)(R13*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R12), R14
- VMOVDQU (R14)(R13*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R12), R14
- VMOVDQU (R14)(R13*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R12), R14
- VMOVDQU (R14)(R13*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R12), R14
- VMOVDQU (R14)(R13*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R12), R14
- VMOVDQU (R14)(R13*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R12), R14
- VMOVDQU (R14)(R13*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R12), R14
- VMOVDQU (R14)(R13*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
- MOVQ 192(R12), R14
- VMOVDQU Y8, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x9Xor_end:
- RET
-
-// func mulAvxTwo_8x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 175 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_8x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
- MOVQ 192(R12), R14
- VMOVDQU Y8, (R14)(R13*1)
- MOVQ 216(R12), R14
- VMOVDQU Y9, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x10_loop
- VZEROUPPER
-
-mulAvxTwo_8x10_end:
- RET
-
-// func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_8x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 175 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), DX
- MOVQ out_base+48(FP), R12
- MOVQ start+72(FP), R13
-
- // Add start offset to input
- ADDQ R13, BX
- ADDQ R13, SI
- ADDQ R13, DI
- ADDQ R13, R8
- ADDQ R13, R9
- ADDQ R13, R10
- ADDQ R13, R11
- ADDQ R13, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_8x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R12), R14
- VMOVDQU (R14)(R13*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R12), R14
- VMOVDQU (R14)(R13*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R12), R14
- VMOVDQU (R14)(R13*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R12), R14
- VMOVDQU (R14)(R13*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R12), R14
- VMOVDQU (R14)(R13*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R12), R14
- VMOVDQU (R14)(R13*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R12), R14
- VMOVDQU (R14)(R13*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R12), R14
- VMOVDQU (R14)(R13*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R12), R14
- VMOVDQU (R14)(R13*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R12), R14
- VMOVDQU (R14)(R13*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R12), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(R12), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(R12), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(R12), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(R12), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(R12), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(R12), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(R12), R14
- VMOVDQU Y7, (R14)(R13*1)
- MOVQ 192(R12), R14
- VMOVDQU Y8, (R14)(R13*1)
- MOVQ 216(R12), R14
- VMOVDQU Y9, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_8x10Xor_end:
- RET
-
-// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x1(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x1_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_9x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y4
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x1_loop
- VZEROUPPER
-
-mulAvxTwo_9x1_end:
- RET
-
-// func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 42 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_9x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y6
- VMOVDQU 32(R12), Y5
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 8 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_9x1_64_end:
- RET
-
-// func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x1Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x1Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_9x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (R13), Y0
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y4
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x1Xor_end:
- RET
-
-// func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 42 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to output
- ADDQ R14, R13
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R14
- MOVQ R14, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_9x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R13), Y0
- VMOVDQU 32(R13), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y6
- VMOVDQU 32(R12), Y5
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 8 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R13)
- VMOVDQU Y1, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x1_64Xor_end:
- RET
-
-// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x2(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 43 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R13
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
- ADDQ R15, R13
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_9x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y5
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x2_loop
- VZEROUPPER
-
-mulAvxTwo_9x2_end:
- RET
-
-// func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x2_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 81 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R13
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
- ADDQ R15, R13
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_9x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y9
- VMOVDQU 32(R12), Y11
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 8 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R13)
- VMOVDQU Y3, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_9x2_64_end:
- RET
-
-// func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x2Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 43 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R13
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
- ADDQ R15, R13
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_9x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R14), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R13), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y5
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x2Xor_end:
- RET
-
-// func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x2_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 81 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R13
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
- ADDQ R15, R13
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_9x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R14), Y0
- VMOVDQU 32(R14), Y1
- VMOVDQU (R13), Y2
- VMOVDQU 32(R13), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y9
- VMOVDQU 32(R12), Y11
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 8 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R13)
- VMOVDQU Y3, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x2_64Xor_end:
- RET
-
-// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x3(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 62 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x3_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_9x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R12), Y6
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x3_loop
- VZEROUPPER
-
-mulAvxTwo_9x3_end:
- RET
-
-// func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x3_64(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 118 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x3_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_9x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (R12), Y11
- VMOVDQU 32(R12), Y13
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 8 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R15)
- VMOVDQU Y3, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y4, (R13)
- VMOVDQU Y5, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_9x3_64_end:
- RET
-
-// func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x3Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 62 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x3Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X3
- VPBROADCASTB X3, Y3
-
-mulAvxTwo_9x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R14), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R15), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R12), Y6
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x3Xor_end:
- RET
-
-// func mulAvxTwo_9x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x3_64Xor(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 118 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x3_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_9x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R14), Y0
- VMOVDQU 32(R14), Y1
- VMOVDQU (R15), Y2
- VMOVDQU 32(R15), Y3
- VMOVDQU (R13), Y4
- VMOVDQU 32(R13), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (R12), Y11
- VMOVDQU 32(R12), Y13
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 8 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R15)
- VMOVDQU Y3, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y4, (R13)
- VMOVDQU Y5, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_9x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x3_64Xor_end:
- RET
-
-// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x4(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 81 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x4_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), AX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R15
- MOVQ 72(R12), R12
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R12
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_9x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (AX), Y7
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_9x4_loop
- VZEROUPPER
-
-mulAvxTwo_9x4_end:
- RET
-
-// func mulAvxTwo_9x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x4Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 81 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x4Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), AX
- MOVQ out_base+48(FP), R12
- MOVQ (R12), R13
- MOVQ 24(R12), R14
- MOVQ 48(R12), R15
- MOVQ 72(R12), R12
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R13
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R12
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_9x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (R13), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU (R14), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU (R15), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R12), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (AX), Y7
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- VMOVDQU Y0, (R13)
- ADDQ $0x20, R13
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y2, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y3, (R12)
- ADDQ $0x20, R12
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_9x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x4Xor_end:
- RET
-
-// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x5(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 100 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_9x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11), Y8
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12), Y8
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x5_loop
- VZEROUPPER
-
-mulAvxTwo_9x5_end:
- RET
-
-// func mulAvxTwo_9x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x5Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 100 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_9x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11), Y8
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12), Y8
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x5Xor_end:
- RET
-
-// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x6(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 119 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_9x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12), Y9
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x6_loop
- VZEROUPPER
-
-mulAvxTwo_9x6_end:
- RET
-
-// func mulAvxTwo_9x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x6Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 119 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_9x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- MOVQ 120(R13), R15
- VMOVDQU (R15)(R14*1), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12), Y9
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x6Xor_end:
- RET
-
-// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x7(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 138 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_9x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12), Y10
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x7_loop
- VZEROUPPER
-
-mulAvxTwo_9x7_end:
- RET
-
-// func mulAvxTwo_9x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x7Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 138 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_9x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- MOVQ 120(R13), R15
- VMOVDQU (R15)(R14*1), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- MOVQ 144(R13), R15
- VMOVDQU (R15)(R14*1), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12), Y10
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x7Xor_end:
- RET
-
-// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x8(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 157 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_9x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12), Y11
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x8_loop
- VZEROUPPER
-
-mulAvxTwo_9x8_end:
- RET
-
-// func mulAvxTwo_9x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x8Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 157 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_9x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- MOVQ 120(R13), R15
- VMOVDQU (R15)(R14*1), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- MOVQ 144(R13), R15
- VMOVDQU (R15)(R14*1), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- MOVQ 168(R13), R15
- VMOVDQU (R15)(R14*1), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12), Y11
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x8Xor_end:
- RET
-
-// func mulAvxTwo_9x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x9(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 176 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_9x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (R12), Y12
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 8 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4608(CX), Y10
- VMOVDQU 4640(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4672(CX), Y10
- VMOVDQU 4704(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4736(CX), Y10
- VMOVDQU 4768(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4800(CX), Y10
- VMOVDQU 4832(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4864(CX), Y10
- VMOVDQU 4896(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4928(CX), Y10
- VMOVDQU 4960(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4992(CX), Y10
- VMOVDQU 5024(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5056(CX), Y10
- VMOVDQU 5088(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5120(CX), Y10
- VMOVDQU 5152(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
- MOVQ 192(R13), R15
- VMOVDQU Y8, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x9_loop
- VZEROUPPER
-
-mulAvxTwo_9x9_end:
- RET
-
-// func mulAvxTwo_9x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x9Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 176 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_9x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R13), R15
- VMOVDQU (R15)(R14*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R13), R15
- VMOVDQU (R15)(R14*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R13), R15
- VMOVDQU (R15)(R14*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R13), R15
- VMOVDQU (R15)(R14*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (R12), Y12
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 8 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4608(CX), Y10
- VMOVDQU 4640(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4672(CX), Y10
- VMOVDQU 4704(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4736(CX), Y10
- VMOVDQU 4768(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4800(CX), Y10
- VMOVDQU 4832(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4864(CX), Y10
- VMOVDQU 4896(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4928(CX), Y10
- VMOVDQU 4960(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4992(CX), Y10
- VMOVDQU 5024(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5056(CX), Y10
- VMOVDQU 5088(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5120(CX), Y10
- VMOVDQU 5152(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
- MOVQ 192(R13), R15
- VMOVDQU Y8, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x9Xor_end:
- RET
-
-// func mulAvxTwo_9x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x10(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 195 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_9x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (R12), Y13
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 8 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5120(CX), Y11
- VMOVDQU 5152(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5184(CX), Y11
- VMOVDQU 5216(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5248(CX), Y11
- VMOVDQU 5280(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5312(CX), Y11
- VMOVDQU 5344(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 5376(CX), Y11
- VMOVDQU 5408(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 5440(CX), Y11
- VMOVDQU 5472(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 5504(CX), Y11
- VMOVDQU 5536(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 5568(CX), Y11
- VMOVDQU 5600(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 5632(CX), Y11
- VMOVDQU 5664(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5696(CX), Y11
- VMOVDQU 5728(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
- MOVQ 192(R13), R15
- VMOVDQU Y8, (R15)(R14*1)
- MOVQ 216(R13), R15
- VMOVDQU Y9, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x10_loop
- VZEROUPPER
-
-mulAvxTwo_9x10_end:
- RET
-
-// func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_9x10Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 195 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), DX
- MOVQ out_base+48(FP), R13
- MOVQ start+72(FP), R14
-
- // Add start offset to input
- ADDQ R14, BX
- ADDQ R14, SI
- ADDQ R14, DI
- ADDQ R14, R8
- ADDQ R14, R9
- ADDQ R14, R10
- ADDQ R14, R11
- ADDQ R14, R12
- ADDQ R14, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_9x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R13), R15
- VMOVDQU (R15)(R14*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R13), R15
- VMOVDQU (R15)(R14*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R13), R15
- VMOVDQU (R15)(R14*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R13), R15
- VMOVDQU (R15)(R14*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R13), R15
- VMOVDQU (R15)(R14*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R13), R15
- VMOVDQU (R15)(R14*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R13), R15
- VMOVDQU (R15)(R14*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R13), R15
- VMOVDQU (R15)(R14*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R13), R15
- VMOVDQU (R15)(R14*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R13), R15
- VMOVDQU (R15)(R14*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (R12), Y13
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 8 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5120(CX), Y11
- VMOVDQU 5152(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5184(CX), Y11
- VMOVDQU 5216(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5248(CX), Y11
- VMOVDQU 5280(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5312(CX), Y11
- VMOVDQU 5344(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 5376(CX), Y11
- VMOVDQU 5408(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 5440(CX), Y11
- VMOVDQU 5472(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 5504(CX), Y11
- VMOVDQU 5536(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 5568(CX), Y11
- VMOVDQU 5600(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 5632(CX), Y11
- VMOVDQU 5664(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5696(CX), Y11
- VMOVDQU 5728(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R13), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(R13), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(R13), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(R13), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(R13), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(R13), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(R13), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(R13), R15
- VMOVDQU Y7, (R15)(R14*1)
- MOVQ 192(R13), R15
- VMOVDQU Y8, (R15)(R14*1)
- MOVQ 216(R13), R15
- VMOVDQU Y9, (R15)(R14*1)
-
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_9x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_9x10Xor_end:
- RET
-
-// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x1(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 24 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x1_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_10x1_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y4
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (R13), Y4
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 9 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 576(CX), Y2
- VMOVDQU 608(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x1_loop
- VZEROUPPER
-
-mulAvxTwo_10x1_end:
- RET
-
-// func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x1_64(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 46 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x1_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_10x1_64_loop:
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- VPXOR Y3, Y4, Y0
- VPXOR Y5, Y6, Y1
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y6
- VMOVDQU 32(R12), Y5
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 8 to 1 outputs
- VMOVDQU (R13), Y6
- VMOVDQU 32(R13), Y5
- ADDQ $0x40, R13
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 9 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x1_64_loop
- VZEROUPPER
-
-mulAvxTwo_10x1_64_end:
- RET
-
-// func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x1Xor(SB), NOSPLIT, $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 24 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x1Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X1
- VPBROADCASTB X1, Y1
-
-mulAvxTwo_10x1Xor_loop:
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y4
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (R14), Y0
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y4
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y4
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y4
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y4
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y4
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y4
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y4
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (R13), Y4
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Load and process 32 bytes from input 9 to 1 outputs
- VMOVDQU (DX), Y4
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 576(CX), Y2
- VMOVDQU 608(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- XOR3WAY( $0x00, Y2, Y3, Y0)
-
- // Store 1 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x1Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x1Xor_end:
- RET
-
-// func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x1_64Xor(SB), $0-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 46 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x1_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to output
- ADDQ R15, R14
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, R15
- MOVQ R15, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_10x1_64Xor_loop:
- // Load 1 outputs
- VMOVDQU (R14), Y0
- VMOVDQU 32(R14), Y1
-
- // Load and process 64 bytes from input 0 to 1 outputs
- VMOVDQU (BX), Y6
- VMOVDQU 32(BX), Y5
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 1 to 1 outputs
- VMOVDQU (SI), Y6
- VMOVDQU 32(SI), Y5
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 2 to 1 outputs
- VMOVDQU (DI), Y6
- VMOVDQU 32(DI), Y5
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 3 to 1 outputs
- VMOVDQU (R8), Y6
- VMOVDQU 32(R8), Y5
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 4 to 1 outputs
- VMOVDQU (R9), Y6
- VMOVDQU 32(R9), Y5
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 5 to 1 outputs
- VMOVDQU (R10), Y6
- VMOVDQU 32(R10), Y5
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 6 to 1 outputs
- VMOVDQU (R11), Y6
- VMOVDQU 32(R11), Y5
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 7 to 1 outputs
- VMOVDQU (R12), Y6
- VMOVDQU 32(R12), Y5
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 8 to 1 outputs
- VMOVDQU (R13), Y6
- VMOVDQU 32(R13), Y5
- ADDQ $0x40, R13
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Load and process 64 bytes from input 9 to 1 outputs
- VMOVDQU (DX), Y6
- VMOVDQU 32(DX), Y5
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y6, Y7
- VPSRLQ $0x04, Y5, Y8
- VPAND Y2, Y6, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y7, Y7
- VPAND Y2, Y8, Y8
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y5
- VPSHUFB Y6, Y3, Y3
- VPSHUFB Y8, Y4, Y6
- VPSHUFB Y7, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- XOR3WAY( $0x00, Y5, Y6, Y1)
-
- // Store 1 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x1_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x1_64Xor_end:
- RET
-
-// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x2(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x2_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R15
- MOVQ 24(R14), R14
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R15
- ADDQ BP, R14
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_10x2_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y5
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (R13), Y5
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 9 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1152(CX), Y3
- VMOVDQU 1184(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1216(CX), Y3
- VMOVDQU 1248(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x2_loop
- VZEROUPPER
-
-mulAvxTwo_10x2_end:
- RET
-
-// func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x2_64(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 89 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x2_64_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R15
- MOVQ 24(R14), R14
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R15
- ADDQ BP, R14
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_10x2_64_loop:
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y0
- VPXOR Y7, Y8, Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- VPXOR Y5, Y6, Y2
- VPXOR Y7, Y8, Y3
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y9
- VMOVDQU 32(R12), Y11
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 8 to 2 outputs
- VMOVDQU (R13), Y9
- VMOVDQU 32(R13), Y11
- ADDQ $0x40, R13
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 9 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R15)
- VMOVDQU Y1, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y2, (R14)
- VMOVDQU Y3, 32(R14)
- ADDQ $0x40, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x2_64_loop
- VZEROUPPER
-
-mulAvxTwo_10x2_64_end:
- RET
-
-// func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x2Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x2Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R15
- MOVQ 24(R14), R14
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R15
- ADDQ BP, R14
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X2
- VPBROADCASTB X2, Y2
-
-mulAvxTwo_10x2Xor_loop:
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y5
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (R15), Y0
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU (R14), Y1
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y5
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y5
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y5
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y5
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y5
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y5
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y5
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (R13), Y5
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Load and process 32 bytes from input 9 to 2 outputs
- VMOVDQU (DX), Y5
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1152(CX), Y3
- VMOVDQU 1184(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y0)
- VMOVDQU 1216(CX), Y3
- VMOVDQU 1248(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- XOR3WAY( $0x00, Y3, Y4, Y1)
-
- // Store 2 outputs
- VMOVDQU Y0, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y1, (R14)
- ADDQ $0x20, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x2Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x2Xor_end:
- RET
-
-// func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x2_64Xor(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 89 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x2_64Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ out_base+48(FP), R14
- MOVQ (R14), R15
- MOVQ 24(R14), R14
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R15
- ADDQ BP, R14
-
- // Add start offset to input
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, R13
- ADDQ BP, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_10x2_64Xor_loop:
- // Load 2 outputs
- VMOVDQU (R15), Y0
- VMOVDQU 32(R15), Y1
- VMOVDQU (R14), Y2
- VMOVDQU 32(R14), Y3
-
- // Load and process 64 bytes from input 0 to 2 outputs
- VMOVDQU (BX), Y9
- VMOVDQU 32(BX), Y11
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 1 to 2 outputs
- VMOVDQU (SI), Y9
- VMOVDQU 32(SI), Y11
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 2 to 2 outputs
- VMOVDQU (DI), Y9
- VMOVDQU 32(DI), Y11
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 3 to 2 outputs
- VMOVDQU (R8), Y9
- VMOVDQU 32(R8), Y11
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 4 to 2 outputs
- VMOVDQU (R9), Y9
- VMOVDQU 32(R9), Y11
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 5 to 2 outputs
- VMOVDQU (R10), Y9
- VMOVDQU 32(R10), Y11
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 6 to 2 outputs
- VMOVDQU (R11), Y9
- VMOVDQU 32(R11), Y11
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 7 to 2 outputs
- VMOVDQU (R12), Y9
- VMOVDQU 32(R12), Y11
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 8 to 2 outputs
- VMOVDQU (R13), Y9
- VMOVDQU 32(R13), Y11
- ADDQ $0x40, R13
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Load and process 64 bytes from input 9 to 2 outputs
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y11
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y9, Y10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y4, Y9, Y9
- VPAND Y4, Y11, Y11
- VPAND Y4, Y10, Y10
- VPAND Y4, Y12, Y12
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y11, Y5, Y7
- VPSHUFB Y9, Y5, Y5
- VPSHUFB Y12, Y6, Y8
- VPSHUFB Y10, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- XOR3WAY( $0x00, Y7, Y8, Y3)
-
- // Store 2 outputs
- VMOVDQU Y0, (R15)
- VMOVDQU Y1, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y2, (R14)
- VMOVDQU Y3, 32(R14)
- ADDQ $0x40, R14
-
- // Prepare for next loop
- DECQ AX
- JNZ mulAvxTwo_10x2_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x2_64Xor_end:
- RET
-
-// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x3(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x3_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), R12
- MOVQ 216(AX), AX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X3
- VPBROADCASTB X3, Y3
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_10x3_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (R12), Y6
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 9 to 3 outputs
- VMOVDQU (AX), Y6
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1728(CX), Y4
- VMOVDQU 1760(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1792(CX), Y4
- VMOVDQU 1824(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1856(CX), Y4
- VMOVDQU 1888(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_10x3_loop
- VZEROUPPER
-
-mulAvxTwo_10x3_end:
- RET
-
-// func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x3_64(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 130 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x3_64_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), R12
- MOVQ 216(AX), AX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
- // Reload length to save a register
- MOVQ n+80(FP), BP
- SHRQ $0x06, BP
-
-mulAvxTwo_10x3_64_loop:
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y0
- VPXOR Y9, Y10, Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y2
- VPXOR Y9, Y10, Y3
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- VPXOR Y7, Y8, Y4
- VPXOR Y9, Y10, Y5
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 8 to 3 outputs
- VMOVDQU (R12), Y11
- VMOVDQU 32(R12), Y13
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 9 to 3 outputs
- VMOVDQU (AX), Y11
- VMOVDQU 32(AX), Y13
- ADDQ $0x40, AX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R15)
- VMOVDQU Y3, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y4, (R13)
- VMOVDQU Y5, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_10x3_64_loop
- VZEROUPPER
-
-mulAvxTwo_10x3_64_end:
- RET
-
-// func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x3Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x3Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), R12
- MOVQ 216(AX), AX
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X3
- VPBROADCASTB X3, Y3
- MOVQ n+80(FP), BP
- SHRQ $0x05, BP
-
-mulAvxTwo_10x3Xor_loop:
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y6
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (R14), Y0
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU (R15), Y1
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU (R13), Y2
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (BX), Y6
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (SI), Y6
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (DI), Y6
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R8), Y6
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R9), Y6
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R10), Y6
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R11), Y6
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (R12), Y6
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Load and process 32 bytes from input 9 to 3 outputs
- VMOVDQU (AX), Y6
- ADDQ $0x20, AX
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1728(CX), Y4
- VMOVDQU 1760(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y0)
- VMOVDQU 1792(CX), Y4
- VMOVDQU 1824(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y1)
- VMOVDQU 1856(CX), Y4
- VMOVDQU 1888(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- XOR3WAY( $0x00, Y4, Y5, Y2)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- ADDQ $0x20, R14
- VMOVDQU Y1, (R15)
- ADDQ $0x20, R15
- VMOVDQU Y2, (R13)
- ADDQ $0x20, R13
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_10x3Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x3Xor_end:
- RET
-
-// func mulAvxTwo_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x3_64Xor(SB), $8-88
- // Loading no tables to registers
- // Destination kept in GP registers
- // Full registers estimated 130 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x06, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x3_64Xor_end
- MOVQ in_base+24(FP), AX
- MOVQ (AX), DX
- MOVQ 24(AX), BX
- MOVQ 48(AX), SI
- MOVQ 72(AX), DI
- MOVQ 96(AX), R8
- MOVQ 120(AX), R9
- MOVQ 144(AX), R10
- MOVQ 168(AX), R11
- MOVQ 192(AX), R12
- MOVQ 216(AX), AX
- MOVQ out_base+48(FP), R13
- MOVQ out_base+48(FP), R13
- MOVQ (R13), R14
- MOVQ 24(R13), R15
- MOVQ 48(R13), R13
- MOVQ start+72(FP), BP
-
- // Add start offset to output
- ADDQ BP, R14
- ADDQ BP, R15
- ADDQ BP, R13
-
- // Add start offset to input
- ADDQ BP, DX
- ADDQ BP, BX
- ADDQ BP, SI
- ADDQ BP, DI
- ADDQ BP, R8
- ADDQ BP, R9
- ADDQ BP, R10
- ADDQ BP, R11
- ADDQ BP, R12
- ADDQ BP, AX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
- // Reload length to save a register
- MOVQ n+80(FP), BP
- SHRQ $0x06, BP
-
-mulAvxTwo_10x3_64Xor_loop:
- // Load 3 outputs
- VMOVDQU (R14), Y0
- VMOVDQU 32(R14), Y1
- VMOVDQU (R15), Y2
- VMOVDQU 32(R15), Y3
- VMOVDQU (R13), Y4
- VMOVDQU 32(R13), Y5
-
- // Load and process 64 bytes from input 0 to 3 outputs
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y13
- ADDQ $0x40, DX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 1 to 3 outputs
- VMOVDQU (BX), Y11
- VMOVDQU 32(BX), Y13
- ADDQ $0x40, BX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 2 to 3 outputs
- VMOVDQU (SI), Y11
- VMOVDQU 32(SI), Y13
- ADDQ $0x40, SI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 3 to 3 outputs
- VMOVDQU (DI), Y11
- VMOVDQU 32(DI), Y13
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 4 to 3 outputs
- VMOVDQU (R8), Y11
- VMOVDQU 32(R8), Y13
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 5 to 3 outputs
- VMOVDQU (R9), Y11
- VMOVDQU 32(R9), Y13
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 6 to 3 outputs
- VMOVDQU (R10), Y11
- VMOVDQU 32(R10), Y13
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 7 to 3 outputs
- VMOVDQU (R11), Y11
- VMOVDQU 32(R11), Y13
- ADDQ $0x40, R11
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 8 to 3 outputs
- VMOVDQU (R12), Y11
- VMOVDQU 32(R12), Y13
- ADDQ $0x40, R12
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Load and process 64 bytes from input 9 to 3 outputs
- VMOVDQU (AX), Y11
- VMOVDQU 32(AX), Y13
- ADDQ $0x40, AX
- VPSRLQ $0x04, Y11, Y12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y6, Y11, Y11
- VPAND Y6, Y13, Y13
- VPAND Y6, Y12, Y12
- VPAND Y6, Y14, Y14
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y13, Y7, Y9
- VPSHUFB Y11, Y7, Y7
- VPSHUFB Y14, Y8, Y10
- VPSHUFB Y12, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- XOR3WAY( $0x00, Y9, Y10, Y5)
-
- // Store 3 outputs
- VMOVDQU Y0, (R14)
- VMOVDQU Y1, 32(R14)
- ADDQ $0x40, R14
- VMOVDQU Y2, (R15)
- VMOVDQU Y3, 32(R15)
- ADDQ $0x40, R15
- VMOVDQU Y4, (R13)
- VMOVDQU Y5, 32(R13)
- ADDQ $0x40, R13
-
- // Prepare for next loop
- DECQ BP
- JNZ mulAvxTwo_10x3_64Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x3_64Xor_end:
- RET
-
-// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x4(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 89 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x4_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_10x4_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R12), Y7
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (R13), Y7
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 9 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2304(CX), Y5
- VMOVDQU 2336(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2368(CX), Y5
- VMOVDQU 2400(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2432(CX), Y5
- VMOVDQU 2464(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2496(CX), Y5
- VMOVDQU 2528(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x4_loop
- VZEROUPPER
-
-mulAvxTwo_10x4_end:
- RET
-
-// func mulAvxTwo_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x4Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 89 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x4Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X4
- VPBROADCASTB X4, Y4
-
-mulAvxTwo_10x4Xor_loop:
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BX), Y7
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI), Y7
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI), Y7
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8), Y7
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9), Y7
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10), Y7
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11), Y7
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R12), Y7
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (R13), Y7
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Load and process 32 bytes from input 9 to 4 outputs
- VMOVDQU (DX), Y7
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2304(CX), Y5
- VMOVDQU 2336(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y0)
- VMOVDQU 2368(CX), Y5
- VMOVDQU 2400(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y1)
- VMOVDQU 2432(CX), Y5
- VMOVDQU 2464(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU 2496(CX), Y5
- VMOVDQU 2528(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y5, Y6, Y3)
-
- // Store 4 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x4Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x4Xor_end:
- RET
-
-// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x5(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x5_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_10x5_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11), Y8
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12), Y8
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (R13), Y8
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 9 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2880(CX), Y6
- VMOVDQU 2912(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2944(CX), Y6
- VMOVDQU 2976(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 3008(CX), Y6
- VMOVDQU 3040(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 3072(CX), Y6
- VMOVDQU 3104(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 3136(CX), Y6
- VMOVDQU 3168(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x5_loop
- VZEROUPPER
-
-mulAvxTwo_10x5_end:
- RET
-
-// func mulAvxTwo_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x5Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x5Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
-
-mulAvxTwo_10x5Xor_loop:
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BX), Y8
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI), Y8
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI), Y8
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8), Y8
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9), Y8
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10), Y8
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11), Y8
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12), Y8
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (R13), Y8
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Load and process 32 bytes from input 9 to 5 outputs
- VMOVDQU (DX), Y8
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2880(CX), Y6
- VMOVDQU 2912(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y0)
- VMOVDQU 2944(CX), Y6
- VMOVDQU 2976(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y1)
- VMOVDQU 3008(CX), Y6
- VMOVDQU 3040(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y2)
- VMOVDQU 3072(CX), Y6
- VMOVDQU 3104(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y3)
- VMOVDQU 3136(CX), Y6
- VMOVDQU 3168(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- XOR3WAY( $0x00, Y6, Y7, Y4)
-
- // Store 5 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x5Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x5Xor_end:
- RET
-
-// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x6(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 131 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x6_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_10x6_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12), Y9
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (R13), Y9
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 9 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3456(CX), Y7
- VMOVDQU 3488(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3520(CX), Y7
- VMOVDQU 3552(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3584(CX), Y7
- VMOVDQU 3616(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3648(CX), Y7
- VMOVDQU 3680(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3712(CX), Y7
- VMOVDQU 3744(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3776(CX), Y7
- VMOVDQU 3808(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x6_loop
- VZEROUPPER
-
-mulAvxTwo_10x6_end:
- RET
-
-// func mulAvxTwo_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x6Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 131 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x6Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
-
-mulAvxTwo_10x6Xor_loop:
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BX), Y9
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- MOVQ 120(R14), BP
- VMOVDQU (BP)(R15*1), Y5
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI), Y9
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI), Y9
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8), Y9
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9), Y9
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10), Y9
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11), Y9
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12), Y9
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (R13), Y9
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Load and process 32 bytes from input 9 to 6 outputs
- VMOVDQU (DX), Y9
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3456(CX), Y7
- VMOVDQU 3488(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y0)
- VMOVDQU 3520(CX), Y7
- VMOVDQU 3552(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y1)
- VMOVDQU 3584(CX), Y7
- VMOVDQU 3616(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y2)
- VMOVDQU 3648(CX), Y7
- VMOVDQU 3680(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y3)
- VMOVDQU 3712(CX), Y7
- VMOVDQU 3744(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y4)
- VMOVDQU 3776(CX), Y7
- VMOVDQU 3808(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- XOR3WAY( $0x00, Y7, Y8, Y5)
-
- // Store 6 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x6Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x6Xor_end:
- RET
-
-// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x7(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 152 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x7_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_10x7_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y6
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12), Y10
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (R13), Y10
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 9 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 4032(CX), Y8
- VMOVDQU 4064(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 4096(CX), Y8
- VMOVDQU 4128(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 4160(CX), Y8
- VMOVDQU 4192(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 4224(CX), Y8
- VMOVDQU 4256(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 4288(CX), Y8
- VMOVDQU 4320(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 4352(CX), Y8
- VMOVDQU 4384(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 4416(CX), Y8
- VMOVDQU 4448(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x7_loop
- VZEROUPPER
-
-mulAvxTwo_10x7_end:
- RET
-
-// func mulAvxTwo_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x7Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 152 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x7Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X7
- VPBROADCASTB X7, Y7
-
-mulAvxTwo_10x7Xor_loop:
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BX), Y10
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- MOVQ 120(R14), BP
- VMOVDQU (BP)(R15*1), Y5
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- MOVQ 144(R14), BP
- VMOVDQU (BP)(R15*1), Y6
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI), Y10
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI), Y10
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8), Y10
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9), Y10
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10), Y10
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11), Y10
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12), Y10
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (R13), Y10
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Load and process 32 bytes from input 9 to 7 outputs
- VMOVDQU (DX), Y10
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 4032(CX), Y8
- VMOVDQU 4064(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y0)
- VMOVDQU 4096(CX), Y8
- VMOVDQU 4128(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y1)
- VMOVDQU 4160(CX), Y8
- VMOVDQU 4192(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y2)
- VMOVDQU 4224(CX), Y8
- VMOVDQU 4256(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y3)
- VMOVDQU 4288(CX), Y8
- VMOVDQU 4320(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y4)
- VMOVDQU 4352(CX), Y8
- VMOVDQU 4384(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y5)
- VMOVDQU 4416(CX), Y8
- VMOVDQU 4448(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- XOR3WAY( $0x00, Y8, Y9, Y6)
-
- // Store 7 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x7Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x7Xor_end:
- RET
-
-// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x8(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 173 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x8_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_10x8_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12), Y11
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (R13), Y11
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 9 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4608(CX), Y9
- VMOVDQU 4640(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4672(CX), Y9
- VMOVDQU 4704(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4736(CX), Y9
- VMOVDQU 4768(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4800(CX), Y9
- VMOVDQU 4832(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4864(CX), Y9
- VMOVDQU 4896(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4928(CX), Y9
- VMOVDQU 4960(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4992(CX), Y9
- VMOVDQU 5024(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 5056(CX), Y9
- VMOVDQU 5088(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x8_loop
- VZEROUPPER
-
-mulAvxTwo_10x8_end:
- RET
-
-// func mulAvxTwo_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x8Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 173 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x8Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X8
- VPBROADCASTB X8, Y8
-
-mulAvxTwo_10x8Xor_loop:
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BX), Y11
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- MOVQ 120(R14), BP
- VMOVDQU (BP)(R15*1), Y5
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- MOVQ 144(R14), BP
- VMOVDQU (BP)(R15*1), Y6
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- MOVQ 168(R14), BP
- VMOVDQU (BP)(R15*1), Y7
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI), Y11
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI), Y11
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8), Y11
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9), Y11
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10), Y11
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11), Y11
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12), Y11
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (R13), Y11
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Load and process 32 bytes from input 9 to 8 outputs
- VMOVDQU (DX), Y11
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4608(CX), Y9
- VMOVDQU 4640(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y0)
- VMOVDQU 4672(CX), Y9
- VMOVDQU 4704(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y1)
- VMOVDQU 4736(CX), Y9
- VMOVDQU 4768(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VMOVDQU 4800(CX), Y9
- VMOVDQU 4832(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y3)
- VMOVDQU 4864(CX), Y9
- VMOVDQU 4896(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU 4928(CX), Y9
- VMOVDQU 4960(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y5)
- VMOVDQU 4992(CX), Y9
- VMOVDQU 5024(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VMOVDQU 5056(CX), Y9
- VMOVDQU 5088(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y9, Y10, Y7)
-
- // Store 8 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x8Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x8Xor_end:
- RET
-
-// func mulAvxTwo_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x9(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 194 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x9_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_10x9_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y0
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y1
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y2
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y3
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y4
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y5
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y6
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y7
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- VPXOR Y10, Y11, Y8
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (R12), Y12
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 8 to 9 outputs
- VMOVDQU (R13), Y12
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4608(CX), Y10
- VMOVDQU 4640(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4672(CX), Y10
- VMOVDQU 4704(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4736(CX), Y10
- VMOVDQU 4768(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4800(CX), Y10
- VMOVDQU 4832(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4864(CX), Y10
- VMOVDQU 4896(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4928(CX), Y10
- VMOVDQU 4960(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4992(CX), Y10
- VMOVDQU 5024(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5056(CX), Y10
- VMOVDQU 5088(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5120(CX), Y10
- VMOVDQU 5152(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 9 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 5184(CX), Y10
- VMOVDQU 5216(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 5248(CX), Y10
- VMOVDQU 5280(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 5312(CX), Y10
- VMOVDQU 5344(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 5376(CX), Y10
- VMOVDQU 5408(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 5440(CX), Y10
- VMOVDQU 5472(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 5504(CX), Y10
- VMOVDQU 5536(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 5568(CX), Y10
- VMOVDQU 5600(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5632(CX), Y10
- VMOVDQU 5664(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5696(CX), Y10
- VMOVDQU 5728(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
- MOVQ 192(R14), BP
- VMOVDQU Y8, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x9_loop
- VZEROUPPER
-
-mulAvxTwo_10x9_end:
- RET
-
-// func mulAvxTwo_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x9Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 194 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x9Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X9
- VPBROADCASTB X9, Y9
-
-mulAvxTwo_10x9Xor_loop:
- // Load and process 32 bytes from input 0 to 9 outputs
- VMOVDQU (BX), Y12
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y10
- VMOVDQU 32(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y10
- VMOVDQU 96(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y10
- VMOVDQU 160(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y10
- VMOVDQU 224(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y10
- VMOVDQU 288(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- MOVQ 120(R14), BP
- VMOVDQU (BP)(R15*1), Y5
- VMOVDQU 320(CX), Y10
- VMOVDQU 352(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- MOVQ 144(R14), BP
- VMOVDQU (BP)(R15*1), Y6
- VMOVDQU 384(CX), Y10
- VMOVDQU 416(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- MOVQ 168(R14), BP
- VMOVDQU (BP)(R15*1), Y7
- VMOVDQU 448(CX), Y10
- VMOVDQU 480(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- MOVQ 192(R14), BP
- VMOVDQU (BP)(R15*1), Y8
- VMOVDQU 512(CX), Y10
- VMOVDQU 544(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 1 to 9 outputs
- VMOVDQU (SI), Y12
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 576(CX), Y10
- VMOVDQU 608(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 640(CX), Y10
- VMOVDQU 672(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 704(CX), Y10
- VMOVDQU 736(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 768(CX), Y10
- VMOVDQU 800(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 832(CX), Y10
- VMOVDQU 864(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 896(CX), Y10
- VMOVDQU 928(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 960(CX), Y10
- VMOVDQU 992(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1024(CX), Y10
- VMOVDQU 1056(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1088(CX), Y10
- VMOVDQU 1120(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 2 to 9 outputs
- VMOVDQU (DI), Y12
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1152(CX), Y10
- VMOVDQU 1184(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1216(CX), Y10
- VMOVDQU 1248(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1280(CX), Y10
- VMOVDQU 1312(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1344(CX), Y10
- VMOVDQU 1376(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1408(CX), Y10
- VMOVDQU 1440(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 1472(CX), Y10
- VMOVDQU 1504(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 1536(CX), Y10
- VMOVDQU 1568(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 1600(CX), Y10
- VMOVDQU 1632(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 1664(CX), Y10
- VMOVDQU 1696(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 3 to 9 outputs
- VMOVDQU (R8), Y12
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 1728(CX), Y10
- VMOVDQU 1760(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 1792(CX), Y10
- VMOVDQU 1824(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 1856(CX), Y10
- VMOVDQU 1888(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 1920(CX), Y10
- VMOVDQU 1952(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 1984(CX), Y10
- VMOVDQU 2016(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2048(CX), Y10
- VMOVDQU 2080(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2112(CX), Y10
- VMOVDQU 2144(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2176(CX), Y10
- VMOVDQU 2208(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2240(CX), Y10
- VMOVDQU 2272(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 4 to 9 outputs
- VMOVDQU (R9), Y12
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2304(CX), Y10
- VMOVDQU 2336(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2368(CX), Y10
- VMOVDQU 2400(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 2432(CX), Y10
- VMOVDQU 2464(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 2496(CX), Y10
- VMOVDQU 2528(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 2560(CX), Y10
- VMOVDQU 2592(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 2624(CX), Y10
- VMOVDQU 2656(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 2688(CX), Y10
- VMOVDQU 2720(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 2752(CX), Y10
- VMOVDQU 2784(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 2816(CX), Y10
- VMOVDQU 2848(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 5 to 9 outputs
- VMOVDQU (R10), Y12
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 2880(CX), Y10
- VMOVDQU 2912(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 2944(CX), Y10
- VMOVDQU 2976(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3008(CX), Y10
- VMOVDQU 3040(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3072(CX), Y10
- VMOVDQU 3104(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3136(CX), Y10
- VMOVDQU 3168(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3200(CX), Y10
- VMOVDQU 3232(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3264(CX), Y10
- VMOVDQU 3296(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3328(CX), Y10
- VMOVDQU 3360(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3392(CX), Y10
- VMOVDQU 3424(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 6 to 9 outputs
- VMOVDQU (R11), Y12
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 3456(CX), Y10
- VMOVDQU 3488(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 3520(CX), Y10
- VMOVDQU 3552(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 3584(CX), Y10
- VMOVDQU 3616(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 3648(CX), Y10
- VMOVDQU 3680(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 3712(CX), Y10
- VMOVDQU 3744(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 3776(CX), Y10
- VMOVDQU 3808(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 3840(CX), Y10
- VMOVDQU 3872(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 3904(CX), Y10
- VMOVDQU 3936(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 3968(CX), Y10
- VMOVDQU 4000(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 7 to 9 outputs
- VMOVDQU (R12), Y12
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4032(CX), Y10
- VMOVDQU 4064(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4096(CX), Y10
- VMOVDQU 4128(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4160(CX), Y10
- VMOVDQU 4192(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4224(CX), Y10
- VMOVDQU 4256(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4288(CX), Y10
- VMOVDQU 4320(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4352(CX), Y10
- VMOVDQU 4384(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4416(CX), Y10
- VMOVDQU 4448(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 4480(CX), Y10
- VMOVDQU 4512(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 4544(CX), Y10
- VMOVDQU 4576(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 8 to 9 outputs
- VMOVDQU (R13), Y12
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 4608(CX), Y10
- VMOVDQU 4640(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 4672(CX), Y10
- VMOVDQU 4704(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 4736(CX), Y10
- VMOVDQU 4768(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 4800(CX), Y10
- VMOVDQU 4832(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 4864(CX), Y10
- VMOVDQU 4896(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 4928(CX), Y10
- VMOVDQU 4960(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 4992(CX), Y10
- VMOVDQU 5024(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5056(CX), Y10
- VMOVDQU 5088(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5120(CX), Y10
- VMOVDQU 5152(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Load and process 32 bytes from input 9 to 9 outputs
- VMOVDQU (DX), Y12
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VMOVDQU 5184(CX), Y10
- VMOVDQU 5216(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y0)
- VMOVDQU 5248(CX), Y10
- VMOVDQU 5280(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y1)
- VMOVDQU 5312(CX), Y10
- VMOVDQU 5344(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y2)
- VMOVDQU 5376(CX), Y10
- VMOVDQU 5408(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y3)
- VMOVDQU 5440(CX), Y10
- VMOVDQU 5472(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y4)
- VMOVDQU 5504(CX), Y10
- VMOVDQU 5536(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y5)
- VMOVDQU 5568(CX), Y10
- VMOVDQU 5600(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y6)
- VMOVDQU 5632(CX), Y10
- VMOVDQU 5664(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y7)
- VMOVDQU 5696(CX), Y10
- VMOVDQU 5728(CX), Y11
- VPSHUFB Y12, Y10, Y10
- VPSHUFB Y13, Y11, Y11
- XOR3WAY( $0x00, Y10, Y11, Y8)
-
- // Store 9 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
- MOVQ 192(R14), BP
- VMOVDQU Y8, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x9Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x9Xor_end:
- RET
-
-// func mulAvxTwo_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x10(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 215 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x10_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_10x10_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y0
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y1
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y2
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y3
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y4
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y5
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y6
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y7
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y8
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- VPXOR Y11, Y12, Y9
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (R12), Y13
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 8 to 10 outputs
- VMOVDQU (R13), Y13
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5120(CX), Y11
- VMOVDQU 5152(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5184(CX), Y11
- VMOVDQU 5216(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5248(CX), Y11
- VMOVDQU 5280(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5312(CX), Y11
- VMOVDQU 5344(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 5376(CX), Y11
- VMOVDQU 5408(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 5440(CX), Y11
- VMOVDQU 5472(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 5504(CX), Y11
- VMOVDQU 5536(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 5568(CX), Y11
- VMOVDQU 5600(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 5632(CX), Y11
- VMOVDQU 5664(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5696(CX), Y11
- VMOVDQU 5728(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 9 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5760(CX), Y11
- VMOVDQU 5792(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5824(CX), Y11
- VMOVDQU 5856(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5888(CX), Y11
- VMOVDQU 5920(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5952(CX), Y11
- VMOVDQU 5984(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 6016(CX), Y11
- VMOVDQU 6048(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 6080(CX), Y11
- VMOVDQU 6112(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 6144(CX), Y11
- VMOVDQU 6176(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 6208(CX), Y11
- VMOVDQU 6240(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 6272(CX), Y11
- VMOVDQU 6304(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 6336(CX), Y11
- VMOVDQU 6368(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
- MOVQ 192(R14), BP
- VMOVDQU Y8, (BP)(R15*1)
- MOVQ 216(R14), BP
- VMOVDQU Y9, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x10_loop
- VZEROUPPER
-
-mulAvxTwo_10x10_end:
- RET
-
-// func mulAvxTwo_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·mulAvxTwo_10x10Xor(SB), NOSPLIT, $8-88
- // Loading no tables to registers
- // Destination kept on stack
- // Full registers estimated 215 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x10Xor_end
- MOVQ in_base+24(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), SI
- MOVQ 48(DX), DI
- MOVQ 72(DX), R8
- MOVQ 96(DX), R9
- MOVQ 120(DX), R10
- MOVQ 144(DX), R11
- MOVQ 168(DX), R12
- MOVQ 192(DX), R13
- MOVQ 216(DX), DX
- MOVQ out_base+48(FP), R14
- MOVQ start+72(FP), R15
-
- // Add start offset to input
- ADDQ R15, BX
- ADDQ R15, SI
- ADDQ R15, DI
- ADDQ R15, R8
- ADDQ R15, R9
- ADDQ R15, R10
- ADDQ R15, R11
- ADDQ R15, R12
- ADDQ R15, R13
- ADDQ R15, DX
- MOVQ $0x0000000f, BP
- MOVQ BP, X10
- VPBROADCASTB X10, Y10
-
-mulAvxTwo_10x10Xor_loop:
- // Load and process 32 bytes from input 0 to 10 outputs
- VMOVDQU (BX), Y13
- ADDQ $0x20, BX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- MOVQ (R14), BP
- VMOVDQU (BP)(R15*1), Y0
- VMOVDQU (CX), Y11
- VMOVDQU 32(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- MOVQ 24(R14), BP
- VMOVDQU (BP)(R15*1), Y1
- VMOVDQU 64(CX), Y11
- VMOVDQU 96(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- MOVQ 48(R14), BP
- VMOVDQU (BP)(R15*1), Y2
- VMOVDQU 128(CX), Y11
- VMOVDQU 160(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- MOVQ 72(R14), BP
- VMOVDQU (BP)(R15*1), Y3
- VMOVDQU 192(CX), Y11
- VMOVDQU 224(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- MOVQ 96(R14), BP
- VMOVDQU (BP)(R15*1), Y4
- VMOVDQU 256(CX), Y11
- VMOVDQU 288(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- MOVQ 120(R14), BP
- VMOVDQU (BP)(R15*1), Y5
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- MOVQ 144(R14), BP
- VMOVDQU (BP)(R15*1), Y6
- VMOVDQU 384(CX), Y11
- VMOVDQU 416(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- MOVQ 168(R14), BP
- VMOVDQU (BP)(R15*1), Y7
- VMOVDQU 448(CX), Y11
- VMOVDQU 480(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- MOVQ 192(R14), BP
- VMOVDQU (BP)(R15*1), Y8
- VMOVDQU 512(CX), Y11
- VMOVDQU 544(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- MOVQ 216(R14), BP
- VMOVDQU (BP)(R15*1), Y9
- VMOVDQU 576(CX), Y11
- VMOVDQU 608(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 1 to 10 outputs
- VMOVDQU (SI), Y13
- ADDQ $0x20, SI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 640(CX), Y11
- VMOVDQU 672(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 704(CX), Y11
- VMOVDQU 736(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 768(CX), Y11
- VMOVDQU 800(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 832(CX), Y11
- VMOVDQU 864(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 896(CX), Y11
- VMOVDQU 928(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 960(CX), Y11
- VMOVDQU 992(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1024(CX), Y11
- VMOVDQU 1056(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1088(CX), Y11
- VMOVDQU 1120(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1152(CX), Y11
- VMOVDQU 1184(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1216(CX), Y11
- VMOVDQU 1248(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 2 to 10 outputs
- VMOVDQU (DI), Y13
- ADDQ $0x20, DI
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1280(CX), Y11
- VMOVDQU 1312(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1344(CX), Y11
- VMOVDQU 1376(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 1408(CX), Y11
- VMOVDQU 1440(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 1472(CX), Y11
- VMOVDQU 1504(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 1536(CX), Y11
- VMOVDQU 1568(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 1600(CX), Y11
- VMOVDQU 1632(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 1664(CX), Y11
- VMOVDQU 1696(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 1728(CX), Y11
- VMOVDQU 1760(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 1792(CX), Y11
- VMOVDQU 1824(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 1856(CX), Y11
- VMOVDQU 1888(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 3 to 10 outputs
- VMOVDQU (R8), Y13
- ADDQ $0x20, R8
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 1920(CX), Y11
- VMOVDQU 1952(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 1984(CX), Y11
- VMOVDQU 2016(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2048(CX), Y11
- VMOVDQU 2080(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2112(CX), Y11
- VMOVDQU 2144(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2176(CX), Y11
- VMOVDQU 2208(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2240(CX), Y11
- VMOVDQU 2272(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2304(CX), Y11
- VMOVDQU 2336(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 2368(CX), Y11
- VMOVDQU 2400(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 2432(CX), Y11
- VMOVDQU 2464(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 2496(CX), Y11
- VMOVDQU 2528(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 4 to 10 outputs
- VMOVDQU (R9), Y13
- ADDQ $0x20, R9
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 2560(CX), Y11
- VMOVDQU 2592(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 2624(CX), Y11
- VMOVDQU 2656(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 2688(CX), Y11
- VMOVDQU 2720(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 2752(CX), Y11
- VMOVDQU 2784(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 2816(CX), Y11
- VMOVDQU 2848(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 2880(CX), Y11
- VMOVDQU 2912(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 2944(CX), Y11
- VMOVDQU 2976(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3008(CX), Y11
- VMOVDQU 3040(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3072(CX), Y11
- VMOVDQU 3104(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3136(CX), Y11
- VMOVDQU 3168(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 5 to 10 outputs
- VMOVDQU (R10), Y13
- ADDQ $0x20, R10
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3200(CX), Y11
- VMOVDQU 3232(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3264(CX), Y11
- VMOVDQU 3296(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3328(CX), Y11
- VMOVDQU 3360(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 3392(CX), Y11
- VMOVDQU 3424(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 3456(CX), Y11
- VMOVDQU 3488(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 3520(CX), Y11
- VMOVDQU 3552(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 3584(CX), Y11
- VMOVDQU 3616(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 3648(CX), Y11
- VMOVDQU 3680(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 3712(CX), Y11
- VMOVDQU 3744(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 3776(CX), Y11
- VMOVDQU 3808(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 6 to 10 outputs
- VMOVDQU (R11), Y13
- ADDQ $0x20, R11
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 3840(CX), Y11
- VMOVDQU 3872(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 3904(CX), Y11
- VMOVDQU 3936(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 3968(CX), Y11
- VMOVDQU 4000(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4032(CX), Y11
- VMOVDQU 4064(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4096(CX), Y11
- VMOVDQU 4128(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4160(CX), Y11
- VMOVDQU 4192(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4224(CX), Y11
- VMOVDQU 4256(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4288(CX), Y11
- VMOVDQU 4320(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4352(CX), Y11
- VMOVDQU 4384(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 4416(CX), Y11
- VMOVDQU 4448(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 7 to 10 outputs
- VMOVDQU (R12), Y13
- ADDQ $0x20, R12
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 4480(CX), Y11
- VMOVDQU 4512(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 4544(CX), Y11
- VMOVDQU 4576(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 4608(CX), Y11
- VMOVDQU 4640(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 4672(CX), Y11
- VMOVDQU 4704(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 4736(CX), Y11
- VMOVDQU 4768(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 4800(CX), Y11
- VMOVDQU 4832(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 4864(CX), Y11
- VMOVDQU 4896(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 4928(CX), Y11
- VMOVDQU 4960(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 4992(CX), Y11
- VMOVDQU 5024(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5056(CX), Y11
- VMOVDQU 5088(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 8 to 10 outputs
- VMOVDQU (R13), Y13
- ADDQ $0x20, R13
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5120(CX), Y11
- VMOVDQU 5152(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5184(CX), Y11
- VMOVDQU 5216(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5248(CX), Y11
- VMOVDQU 5280(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5312(CX), Y11
- VMOVDQU 5344(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 5376(CX), Y11
- VMOVDQU 5408(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 5440(CX), Y11
- VMOVDQU 5472(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 5504(CX), Y11
- VMOVDQU 5536(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 5568(CX), Y11
- VMOVDQU 5600(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 5632(CX), Y11
- VMOVDQU 5664(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 5696(CX), Y11
- VMOVDQU 5728(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Load and process 32 bytes from input 9 to 10 outputs
- VMOVDQU (DX), Y13
- ADDQ $0x20, DX
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VMOVDQU 5760(CX), Y11
- VMOVDQU 5792(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y0)
- VMOVDQU 5824(CX), Y11
- VMOVDQU 5856(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y1)
- VMOVDQU 5888(CX), Y11
- VMOVDQU 5920(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y2)
- VMOVDQU 5952(CX), Y11
- VMOVDQU 5984(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y3)
- VMOVDQU 6016(CX), Y11
- VMOVDQU 6048(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y4)
- VMOVDQU 6080(CX), Y11
- VMOVDQU 6112(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y5)
- VMOVDQU 6144(CX), Y11
- VMOVDQU 6176(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y6)
- VMOVDQU 6208(CX), Y11
- VMOVDQU 6240(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y7)
- VMOVDQU 6272(CX), Y11
- VMOVDQU 6304(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y8)
- VMOVDQU 6336(CX), Y11
- VMOVDQU 6368(CX), Y12
- VPSHUFB Y13, Y11, Y11
- VPSHUFB Y14, Y12, Y12
- XOR3WAY( $0x00, Y11, Y12, Y9)
-
- // Store 10 outputs
- MOVQ (R14), BP
- VMOVDQU Y0, (BP)(R15*1)
- MOVQ 24(R14), BP
- VMOVDQU Y1, (BP)(R15*1)
- MOVQ 48(R14), BP
- VMOVDQU Y2, (BP)(R15*1)
- MOVQ 72(R14), BP
- VMOVDQU Y3, (BP)(R15*1)
- MOVQ 96(R14), BP
- VMOVDQU Y4, (BP)(R15*1)
- MOVQ 120(R14), BP
- VMOVDQU Y5, (BP)(R15*1)
- MOVQ 144(R14), BP
- VMOVDQU Y6, (BP)(R15*1)
- MOVQ 168(R14), BP
- VMOVDQU Y7, (BP)(R15*1)
- MOVQ 192(R14), BP
- VMOVDQU Y8, (BP)(R15*1)
- MOVQ 216(R14), BP
- VMOVDQU Y9, (BP)(R15*1)
-
- // Prepare for next loop
- ADDQ $0x20, R15
- DECQ AX
- JNZ mulAvxTwo_10x10Xor_loop
- VZEROUPPER
-
-mulAvxTwo_10x10Xor_end:
- RET
-
-// func ifftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT2_avx2(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- VBROADCASTI128 (AX), Y0
- VBROADCASTI128 64(AX), Y1
- VBROADCASTI128 16(AX), Y2
- VBROADCASTI128 80(AX), Y3
- VBROADCASTI128 32(AX), Y4
- VBROADCASTI128 96(AX), Y5
- VBROADCASTI128 48(AX), Y6
- VBROADCASTI128 112(AX), Y7
- MOVQ x_len+8(FP), AX
- MOVQ x_base+0(FP), CX
- MOVQ y_base+24(FP), DX
- MOVQ $0x0000000f, BX
- MOVQ BX, X8
- VPBROADCASTB X8, Y8
-
-loop:
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y12
- VPXOR Y11, Y9, Y11
- VPXOR Y12, Y10, Y12
- VMOVDQU Y11, (DX)
- VMOVDQU Y12, 32(DX)
- VPSRLQ $0x04, Y11, Y13
- VPAND Y8, Y11, Y11
- VPAND Y8, Y13, Y13
- VPSHUFB Y11, Y0, Y14
- VPSHUFB Y11, Y1, Y11
- VPSHUFB Y13, Y2, Y15
- VPSHUFB Y13, Y3, Y13
- VPXOR Y14, Y15, Y14
- VPXOR Y11, Y13, Y11
- VPAND Y12, Y8, Y13
- VPSRLQ $0x04, Y12, Y12
- VPAND Y8, Y12, Y12
- VPSHUFB Y13, Y4, Y15
- VPSHUFB Y13, Y5, Y13
- VPXOR Y14, Y15, Y14
- VPXOR Y11, Y13, Y11
- VPSHUFB Y12, Y6, Y15
- VPSHUFB Y12, Y7, Y13
- XOR3WAY( $0x00, Y14, Y15, Y9)
- XOR3WAY( $0x00, Y11, Y13, Y10)
- VMOVDQU Y9, (CX)
- VMOVDQU Y10, 32(CX)
- ADDQ $0x40, CX
- ADDQ $0x40, DX
- SUBQ $0x40, AX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT2_avx2(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- VBROADCASTI128 (AX), Y0
- VBROADCASTI128 64(AX), Y1
- VBROADCASTI128 16(AX), Y2
- VBROADCASTI128 80(AX), Y3
- VBROADCASTI128 32(AX), Y4
- VBROADCASTI128 96(AX), Y5
- VBROADCASTI128 48(AX), Y6
- VBROADCASTI128 112(AX), Y7
- MOVQ x_len+8(FP), AX
- MOVQ x_base+0(FP), CX
- MOVQ y_base+24(FP), DX
- MOVQ $0x0000000f, BX
- MOVQ BX, X8
- VPBROADCASTB X8, Y8
-
-loop:
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y12
- VPSRLQ $0x04, Y11, Y13
- VPAND Y8, Y11, Y11
- VPAND Y8, Y13, Y13
- VPSHUFB Y11, Y0, Y14
- VPSHUFB Y11, Y1, Y11
- VPSHUFB Y13, Y2, Y15
- VPSHUFB Y13, Y3, Y13
- VPXOR Y14, Y15, Y14
- VPXOR Y11, Y13, Y11
- VPAND Y12, Y8, Y13
- VPSRLQ $0x04, Y12, Y12
- VPAND Y8, Y12, Y12
- VPSHUFB Y13, Y4, Y15
- VPSHUFB Y13, Y5, Y13
- VPXOR Y14, Y15, Y14
- VPXOR Y11, Y13, Y11
- VPSHUFB Y12, Y6, Y15
- VPSHUFB Y12, Y7, Y13
- XOR3WAY( $0x00, Y14, Y15, Y9)
- XOR3WAY( $0x00, Y11, Y13, Y10)
- VMOVDQU Y9, (CX)
- VMOVDQU Y10, 32(CX)
- VMOVDQU (DX), Y11
- VMOVDQU 32(DX), Y12
- VPXOR Y11, Y9, Y11
- VPXOR Y12, Y10, Y12
- VMOVDQU Y11, (DX)
- VMOVDQU Y12, 32(DX)
- ADDQ $0x40, CX
- ADDQ $0x40, DX
- SUBQ $0x40, AX
- JNZ loop
- VZEROUPPER
- RET
-
-// func mulgf16_avx2(x []byte, y []byte, table *[128]uint8)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulgf16_avx2(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- VBROADCASTI128 (AX), Y0
- VBROADCASTI128 64(AX), Y1
- VBROADCASTI128 16(AX), Y2
- VBROADCASTI128 80(AX), Y3
- VBROADCASTI128 32(AX), Y4
- VBROADCASTI128 96(AX), Y5
- VBROADCASTI128 48(AX), Y6
- VBROADCASTI128 112(AX), Y7
- MOVQ x_len+8(FP), AX
- MOVQ x_base+0(FP), CX
- MOVQ y_base+24(FP), DX
- MOVQ $0x0000000f, BX
- MOVQ BX, X8
- VPBROADCASTB X8, Y8
-
-loop:
- VMOVDQU (DX), Y9
- VMOVDQU 32(DX), Y10
- VPSRLQ $0x04, Y9, Y11
- VPAND Y8, Y9, Y9
- VPAND Y8, Y11, Y11
- VPSHUFB Y9, Y0, Y12
- VPSHUFB Y9, Y1, Y9
- VPSHUFB Y11, Y2, Y13
- VPSHUFB Y11, Y3, Y11
- VPXOR Y12, Y13, Y12
- VPXOR Y9, Y11, Y9
- VPAND Y10, Y8, Y11
- VPSRLQ $0x04, Y10, Y10
- VPAND Y8, Y10, Y10
- VPSHUFB Y11, Y4, Y13
- VPSHUFB Y11, Y5, Y11
- VPXOR Y12, Y13, Y12
- VPXOR Y9, Y11, Y9
- VPSHUFB Y10, Y6, Y13
- VPSHUFB Y10, Y7, Y11
- VPXOR Y12, Y13, Y12
- VPXOR Y9, Y11, Y9
- VMOVDQU Y12, (CX)
- VMOVDQU Y9, 32(CX)
- ADDQ $0x40, CX
- ADDQ $0x40, DX
- SUBQ $0x40, AX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_0(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- VBROADCASTI128 (DX), Y1
- VBROADCASTI128 64(DX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(DX), Y1
- VBROADCASTI128 80(DX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(DX), Y1
- VBROADCASTI128 96(DX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(DX), Y1
- VBROADCASTI128 112(DX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ AX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ AX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ AX, SI
- MOVQ (DX)(SI*1), AX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VPSHUFB Y5, Y24, Y7
- VPSHUFB Y5, Y25, Y5
- VPSHUFB Y6, Y26, Y8
- VPSHUFB Y6, Y27, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VPSHUFB Y6, Y28, Y9
- VPSHUFB Y6, Y29, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VPSHUFB Y8, Y30, Y9
- VPSHUFB Y8, Y31, Y6
- VPTERNLOGD $0x96, Y7, Y9, Y1
- VPTERNLOGD $0x96, Y5, Y6, Y2
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y5
- VPTERNLOGD $0x96, Y9, Y10, Y6
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_0(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- VBROADCASTI128 (DX), Y1
- VBROADCASTI128 64(DX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(DX), Y1
- VBROADCASTI128 80(DX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(DX), Y1
- VBROADCASTI128 96(DX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(DX), Y1
- VBROADCASTI128 112(DX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ AX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ AX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ AX, SI
- MOVQ (DX)(SI*1), AX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y24, Y11
- VPSHUFB Y9, Y25, Y9
- VPSHUFB Y10, Y26, Y12
- VPSHUFB Y10, Y27, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y28, Y13
- VPSHUFB Y10, Y29, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y30, Y13
- VPSHUFB Y12, Y31, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VBROADCASTI128 (CX), Y3
- VBROADCASTI128 64(CX), Y4
- VPSHUFB Y1, Y3, Y3
- VPSHUFB Y1, Y4, Y1
- VBROADCASTI128 16(CX), Y4
- VBROADCASTI128 80(CX), Y9
- VPSHUFB Y2, Y4, Y4
- VPSHUFB Y2, Y9, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VBROADCASTI128 32(CX), Y9
- VBROADCASTI128 96(CX), Y10
- VPSHUFB Y2, Y9, Y9
- VPSHUFB Y2, Y10, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VBROADCASTI128 48(CX), Y9
- VBROADCASTI128 112(CX), Y2
- VPSHUFB Y4, Y9, Y9
- VPSHUFB Y4, Y2, Y2
- VPTERNLOGD $0x96, Y3, Y9, Y5
- VPTERNLOGD $0x96, Y1, Y2, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_1(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y24, Y11
- VPSHUFB Y9, Y25, Y9
- VPSHUFB Y10, Y26, Y12
- VPSHUFB Y10, Y27, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y28, Y13
- VPSHUFB Y10, Y29, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y30, Y13
- VPSHUFB Y12, Y31, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y5
- VPTERNLOGD $0x96, Y9, Y10, Y6
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_1(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VPSHUFB Y1, Y24, Y3
- VPSHUFB Y1, Y25, Y1
- VPSHUFB Y2, Y26, Y4
- VPSHUFB Y2, Y27, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VPSHUFB Y2, Y28, Y9
- VPSHUFB Y2, Y29, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VPSHUFB Y4, Y30, Y9
- VPSHUFB Y4, Y31, Y2
- VPTERNLOGD $0x96, Y3, Y9, Y5
- VPTERNLOGD $0x96, Y1, Y2, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_2(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VPSHUFB Y5, Y24, Y7
- VPSHUFB Y5, Y25, Y5
- VPSHUFB Y6, Y26, Y8
- VPSHUFB Y6, Y27, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VPSHUFB Y6, Y28, Y9
- VPSHUFB Y6, Y29, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VPSHUFB Y8, Y30, Y9
- VPSHUFB Y8, Y31, Y6
- VPTERNLOGD $0x96, Y7, Y9, Y1
- VPTERNLOGD $0x96, Y5, Y6, Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_2(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VPSHUFB Y1, Y24, Y3
- VPSHUFB Y1, Y25, Y1
- VPSHUFB Y2, Y26, Y4
- VPSHUFB Y2, Y27, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VPSHUFB Y2, Y28, Y9
- VPSHUFB Y2, Y29, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VPSHUFB Y4, Y30, Y9
- VPSHUFB Y4, Y31, Y2
- VPTERNLOGD $0x96, Y3, Y9, Y5
- VPTERNLOGD $0x96, Y1, Y2, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_3(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_3(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VPSHUFB Y1, Y16, Y3
- VPSHUFB Y1, Y17, Y1
- VPSHUFB Y2, Y18, Y4
- VPSHUFB Y2, Y19, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VPSHUFB Y2, Y20, Y9
- VPSHUFB Y2, Y21, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VPSHUFB Y4, Y22, Y9
- VPSHUFB Y4, Y23, Y2
- VPTERNLOGD $0x96, Y3, Y9, Y5
- VPTERNLOGD $0x96, Y1, Y2, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_4(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VPSHUFB Y5, Y16, Y7
- VPSHUFB Y5, Y17, Y5
- VPSHUFB Y6, Y18, Y8
- VPSHUFB Y6, Y19, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VPSHUFB Y6, Y20, Y9
- VPSHUFB Y6, Y21, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VPSHUFB Y8, Y22, Y9
- VPSHUFB Y8, Y23, Y6
- VPTERNLOGD $0x96, Y7, Y9, Y1
- VPTERNLOGD $0x96, Y5, Y6, Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y24, Y11
- VPSHUFB Y9, Y25, Y9
- VPSHUFB Y10, Y26, Y12
- VPSHUFB Y10, Y27, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y28, Y13
- VPSHUFB Y10, Y29, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y30, Y13
- VPSHUFB Y12, Y31, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y5
- VPTERNLOGD $0x96, Y9, Y10, Y6
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_4(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (CX), Y1
- VBROADCASTI128 64(CX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(CX), Y1
- VBROADCASTI128 80(CX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(CX), Y1
- VBROADCASTI128 96(CX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(CX), Y1
- VBROADCASTI128 112(CX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z24
- VMOVAPS Z0, Z25
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z26
- VMOVAPS Z0, Z27
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z28
- VMOVAPS Z0, Z29
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z30
- VMOVAPS Z0, Z31
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y24, Y11
- VPSHUFB Y9, Y25, Y9
- VPSHUFB Y10, Y26, Y12
- VPSHUFB Y10, Y27, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y28, Y13
- VPSHUFB Y10, Y29, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y30, Y13
- VPSHUFB Y12, Y31, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_5(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y5
- VPTERNLOGD $0x96, Y9, Y10, Y6
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_5(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx512_6(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VPSHUFB Y5, Y16, Y7
- VPSHUFB Y5, Y17, Y5
- VPSHUFB Y6, Y18, Y8
- VPSHUFB Y6, Y19, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VPSHUFB Y6, Y20, Y9
- VPSHUFB Y6, Y21, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VPSHUFB Y8, Y22, Y9
- VPSHUFB Y8, Y23, Y6
- VPTERNLOGD $0x96, Y7, Y9, Y1
- VPTERNLOGD $0x96, Y5, Y6, Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx512_6(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- VBROADCASTI128 (AX), Y1
- VBROADCASTI128 64(AX), Y0
- VMOVAPS Z1, Z16
- VMOVAPS Z0, Z17
- VBROADCASTI128 16(AX), Y1
- VBROADCASTI128 80(AX), Y0
- VMOVAPS Z1, Z18
- VMOVAPS Z0, Z19
- VBROADCASTI128 32(AX), Y1
- VBROADCASTI128 96(AX), Y0
- VMOVAPS Z1, Z20
- VMOVAPS Z0, Z21
- VBROADCASTI128 48(AX), Y1
- VBROADCASTI128 112(AX), Y0
- VMOVAPS Z1, Z22
- VMOVAPS Z0, Z23
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU (R8), Y5
- VMOVDQU 32(R8), Y6
- VMOVDQU (DI), Y3
- VMOVDQU 32(DI), Y4
- VMOVDQU (AX), Y7
- VMOVDQU 32(AX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y1
- VPTERNLOGD $0x96, Y9, Y10, Y2
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VPSHUFB Y9, Y16, Y11
- VPSHUFB Y9, Y17, Y9
- VPSHUFB Y10, Y18, Y12
- VPSHUFB Y10, Y19, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VPSHUFB Y10, Y20, Y13
- VPSHUFB Y10, Y21, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VPSHUFB Y12, Y22, Y13
- VPSHUFB Y12, Y23, Y10
- VPTERNLOGD $0x96, Y11, Y13, Y3
- VPTERNLOGD $0x96, Y9, Y10, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (SI)
- VMOVDQU Y2, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y3, (DI)
- VMOVDQU Y4, 32(DI)
- ADDQ $0x40, DI
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R8)
- VMOVDQU Y6, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y7, (AX)
- VMOVDQU Y8, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, SSE2
-TEXT ·ifftDIT4_avx512_7(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
- VPXOR Y0, Y2, Y2
- VPXOR Y1, Y3, Y3
- VMOVDQU (R8), Y4
- VMOVDQU 32(R8), Y5
- VMOVDQU (AX), Y6
- VMOVDQU 32(AX), Y7
- VPXOR Y4, Y6, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y0, Y4, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y6, (AX)
- VMOVDQU Y7, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, SSE2
-TEXT ·fftDIT4_avx512_7(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (R8), Y4
- VMOVDQU 32(R8), Y5
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
- VMOVDQU (AX), Y6
- VMOVDQU 32(AX), Y7
- VPXOR Y0, Y4, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y0, Y2, Y2
- VPXOR Y1, Y3, Y3
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VPXOR Y4, Y6, Y6
- VPXOR Y5, Y7, Y7
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y6, (AX)
- VMOVDQU Y7, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_0(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- MOVQ $0x0000000f, BX
- MOVQ BX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), BX
- MOVQ work_base+0(FP), SI
- MOVQ 8(SI), DI
- XORQ R8, R8
- MOVQ (SI)(R8*1), R9
- ADDQ BX, R8
- MOVQ (SI)(R8*1), R10
- ADDQ BX, R8
- MOVQ (SI)(R8*1), R11
- ADDQ BX, R8
- MOVQ (SI)(R8*1), BX
-
-loop:
- VMOVDQU (R9), Y1
- VMOVDQU 32(R9), Y2
- VMOVDQU (R10), Y3
- VMOVDQU 32(R10), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VBROADCASTI128 (AX), Y7
- VBROADCASTI128 64(AX), Y8
- VPSHUFB Y5, Y7, Y7
- VPSHUFB Y5, Y8, Y5
- VBROADCASTI128 16(AX), Y8
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y6, Y8, Y8
- VPSHUFB Y6, Y9, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y6, Y9, Y9
- VPSHUFB Y6, Y10, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y6
- VPSHUFB Y8, Y9, Y9
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y7, Y9, Y1)
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R11), Y5
- VMOVDQU 32(R11), Y6
- VMOVDQU (BX), Y7
- VMOVDQU 32(BX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y5)
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (DX), Y11
- VBROADCASTI128 64(DX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(DX), Y12
- VBROADCASTI128 80(DX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(DX), Y13
- VBROADCASTI128 96(DX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(DX), Y13
- VBROADCASTI128 112(DX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (DX), Y11
- VBROADCASTI128 64(DX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(DX), Y12
- VBROADCASTI128 80(DX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(DX), Y13
- VBROADCASTI128 96(DX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(DX), Y13
- VBROADCASTI128 112(DX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU Y1, (R9)
- VMOVDQU Y2, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y3, (R10)
- VMOVDQU Y4, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y5, (R11)
- VMOVDQU Y6, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y7, (BX)
- VMOVDQU Y8, 32(BX)
- ADDQ $0x40, BX
- SUBQ $0x40, DI
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_0(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- MOVQ $0x0000000f, BX
- MOVQ BX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), BX
- MOVQ work_base+0(FP), SI
- MOVQ 8(SI), DI
- XORQ R8, R8
- MOVQ (SI)(R8*1), R9
- ADDQ BX, R8
- MOVQ (SI)(R8*1), R10
- ADDQ BX, R8
- MOVQ (SI)(R8*1), R11
- ADDQ BX, R8
- MOVQ (SI)(R8*1), BX
-
-loop:
- VMOVDQU (R9), Y1
- VMOVDQU 32(R9), Y2
- VMOVDQU (R11), Y5
- VMOVDQU 32(R11), Y6
- VMOVDQU (R10), Y3
- VMOVDQU 32(R10), Y4
- VMOVDQU (BX), Y7
- VMOVDQU 32(BX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (DX), Y11
- VBROADCASTI128 64(DX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(DX), Y12
- VBROADCASTI128 80(DX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(DX), Y13
- VBROADCASTI128 96(DX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(DX), Y13
- VBROADCASTI128 112(DX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (DX), Y11
- VBROADCASTI128 64(DX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(DX), Y12
- VBROADCASTI128 80(DX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(DX), Y13
- VBROADCASTI128 96(DX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(DX), Y13
- VBROADCASTI128 112(DX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (R9)
- VMOVDQU Y2, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y3, (R10)
- VMOVDQU Y4, 32(R10)
- ADDQ $0x40, R10
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VBROADCASTI128 (CX), Y3
- VBROADCASTI128 64(CX), Y4
- VPSHUFB Y1, Y3, Y3
- VPSHUFB Y1, Y4, Y1
- VBROADCASTI128 16(CX), Y4
- VBROADCASTI128 80(CX), Y9
- VPSHUFB Y2, Y4, Y4
- VPSHUFB Y2, Y9, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VBROADCASTI128 32(CX), Y9
- VBROADCASTI128 96(CX), Y10
- VPSHUFB Y2, Y9, Y9
- VPSHUFB Y2, Y10, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VBROADCASTI128 48(CX), Y9
- VBROADCASTI128 112(CX), Y2
- VPSHUFB Y4, Y9, Y9
- VPSHUFB Y4, Y2, Y2
- XOR3WAY( $0x00, Y3, Y9, Y5)
- XOR3WAY( $0x00, Y1, Y2, Y6)
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R11)
- VMOVDQU Y6, 32(R11)
- ADDQ $0x40, R11
- VMOVDQU Y7, (BX)
- VMOVDQU Y8, 32(BX)
- ADDQ $0x40, BX
- SUBQ $0x40, DI
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_1(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y5)
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_1(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VBROADCASTI128 (CX), Y3
- VBROADCASTI128 64(CX), Y4
- VPSHUFB Y1, Y3, Y3
- VPSHUFB Y1, Y4, Y1
- VBROADCASTI128 16(CX), Y4
- VBROADCASTI128 80(CX), Y9
- VPSHUFB Y2, Y4, Y4
- VPSHUFB Y2, Y9, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VBROADCASTI128 32(CX), Y9
- VBROADCASTI128 96(CX), Y10
- VPSHUFB Y2, Y9, Y9
- VPSHUFB Y2, Y10, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VBROADCASTI128 48(CX), Y9
- VBROADCASTI128 112(CX), Y2
- VPSHUFB Y4, Y9, Y9
- VPSHUFB Y4, Y2, Y2
- XOR3WAY( $0x00, Y3, Y9, Y5)
- XOR3WAY( $0x00, Y1, Y2, Y6)
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_2(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VBROADCASTI128 (AX), Y7
- VBROADCASTI128 64(AX), Y8
- VPSHUFB Y5, Y7, Y7
- VPSHUFB Y5, Y8, Y5
- VBROADCASTI128 16(AX), Y8
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y6, Y8, Y8
- VPSHUFB Y6, Y9, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y6, Y9, Y9
- VPSHUFB Y6, Y10, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y6
- VPSHUFB Y8, Y9, Y9
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y7, Y9, Y1)
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_2(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VBROADCASTI128 (AX), Y3
- VBROADCASTI128 64(AX), Y4
- VPSHUFB Y1, Y3, Y3
- VPSHUFB Y1, Y4, Y1
- VBROADCASTI128 16(AX), Y4
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y2, Y4, Y4
- VPSHUFB Y2, Y9, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y2, Y9, Y9
- VPSHUFB Y2, Y10, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y2
- VPSHUFB Y4, Y9, Y9
- VPSHUFB Y4, Y2, Y2
- XOR3WAY( $0x00, Y3, Y9, Y5)
- XOR3WAY( $0x00, Y1, Y2, Y6)
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_3(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_3(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VPSRLQ $0x04, Y7, Y2
- VPAND Y0, Y7, Y1
- VPAND Y0, Y2, Y2
- VBROADCASTI128 (AX), Y3
- VBROADCASTI128 64(AX), Y4
- VPSHUFB Y1, Y3, Y3
- VPSHUFB Y1, Y4, Y1
- VBROADCASTI128 16(AX), Y4
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y2, Y4, Y4
- VPSHUFB Y2, Y9, Y2
- VPXOR Y3, Y4, Y3
- VPXOR Y1, Y2, Y1
- VPAND Y8, Y0, Y2
- VPSRLQ $0x04, Y8, Y4
- VPAND Y0, Y4, Y4
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y2, Y9, Y9
- VPSHUFB Y2, Y10, Y2
- VPXOR Y3, Y9, Y3
- VPXOR Y1, Y2, Y1
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y2
- VPSHUFB Y4, Y9, Y9
- VPSHUFB Y4, Y2, Y2
- XOR3WAY( $0x00, Y3, Y9, Y5)
- XOR3WAY( $0x00, Y1, Y2, Y6)
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_4(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), DX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VBROADCASTI128 (AX), Y7
- VBROADCASTI128 64(AX), Y8
- VPSHUFB Y5, Y7, Y7
- VPSHUFB Y5, Y8, Y5
- VBROADCASTI128 16(AX), Y8
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y6, Y8, Y8
- VPSHUFB Y6, Y9, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y6, Y9, Y9
- VPSHUFB Y6, Y10, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y6
- VPSHUFB Y8, Y9, Y9
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y7, Y9, Y1)
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y5)
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_4(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, DX
- MOVQ DX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), DX
- MOVQ work_base+0(FP), BX
- MOVQ 8(BX), SI
- XORQ DI, DI
- MOVQ (BX)(DI*1), R8
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R9
- ADDQ DX, DI
- MOVQ (BX)(DI*1), R10
- ADDQ DX, DI
- MOVQ (BX)(DI*1), DX
-
-loop:
- VMOVDQU (R8), Y1
- VMOVDQU 32(R8), Y2
- VMOVDQU (R10), Y5
- VMOVDQU 32(R10), Y6
- VMOVDQU (R9), Y3
- VMOVDQU 32(R9), Y4
- VMOVDQU (DX), Y7
- VMOVDQU 32(DX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (CX), Y11
- VBROADCASTI128 64(CX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(CX), Y12
- VBROADCASTI128 80(CX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(CX), Y13
- VBROADCASTI128 96(CX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(CX), Y13
- VBROADCASTI128 112(CX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (R8)
- VMOVDQU Y2, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y3, (R9)
- VMOVDQU Y4, 32(R9)
- ADDQ $0x40, R9
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R10)
- VMOVDQU Y6, 32(R10)
- ADDQ $0x40, R10
- VMOVDQU Y7, (DX)
- VMOVDQU Y8, 32(DX)
- ADDQ $0x40, DX
- SUBQ $0x40, SI
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_5(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y5)
- XOR3WAY( $0x00, Y9, Y10, Y6)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_5(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPSRLQ $0x04, Y3, Y10
- VPAND Y0, Y3, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y4, Y0, Y10
- VPSRLQ $0x04, Y4, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·ifftDIT4_avx2_6(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), CX
- MOVQ table02+48(FP), CX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VPSRLQ $0x04, Y3, Y6
- VPAND Y0, Y3, Y5
- VPAND Y0, Y6, Y6
- VBROADCASTI128 (AX), Y7
- VBROADCASTI128 64(AX), Y8
- VPSHUFB Y5, Y7, Y7
- VPSHUFB Y5, Y8, Y5
- VBROADCASTI128 16(AX), Y8
- VBROADCASTI128 80(AX), Y9
- VPSHUFB Y6, Y8, Y8
- VPSHUFB Y6, Y9, Y6
- VPXOR Y7, Y8, Y7
- VPXOR Y5, Y6, Y5
- VPAND Y4, Y0, Y6
- VPSRLQ $0x04, Y4, Y8
- VPAND Y0, Y8, Y8
- VBROADCASTI128 32(AX), Y9
- VBROADCASTI128 96(AX), Y10
- VPSHUFB Y6, Y9, Y9
- VPSHUFB Y6, Y10, Y6
- VPXOR Y7, Y9, Y7
- VPXOR Y5, Y6, Y5
- VBROADCASTI128 48(AX), Y9
- VBROADCASTI128 112(AX), Y6
- VPSHUFB Y8, Y9, Y9
- VPSHUFB Y8, Y6, Y6
- XOR3WAY( $0x00, Y7, Y9, Y1)
- XOR3WAY( $0x00, Y5, Y6, Y2)
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
-TEXT ·fftDIT4_avx2_6(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, CX
- MOVQ CX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), CX
- MOVQ work_base+0(FP), DX
- MOVQ 8(DX), BX
- XORQ SI, SI
- MOVQ (DX)(SI*1), DI
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R8
- ADDQ CX, SI
- MOVQ (DX)(SI*1), R9
- ADDQ CX, SI
- MOVQ (DX)(SI*1), CX
-
-loop:
- VMOVDQU (DI), Y1
- VMOVDQU 32(DI), Y2
- VMOVDQU (R9), Y5
- VMOVDQU 32(R9), Y6
- VMOVDQU (R8), Y3
- VMOVDQU 32(R8), Y4
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSRLQ $0x04, Y5, Y10
- VPAND Y0, Y5, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y6, Y0, Y10
- VPSRLQ $0x04, Y6, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y1)
- XOR3WAY( $0x00, Y9, Y10, Y2)
- VPSRLQ $0x04, Y7, Y10
- VPAND Y0, Y7, Y9
- VPAND Y0, Y10, Y10
- VBROADCASTI128 (AX), Y11
- VBROADCASTI128 64(AX), Y12
- VPSHUFB Y9, Y11, Y11
- VPSHUFB Y9, Y12, Y9
- VBROADCASTI128 16(AX), Y12
- VBROADCASTI128 80(AX), Y13
- VPSHUFB Y10, Y12, Y12
- VPSHUFB Y10, Y13, Y10
- VPXOR Y11, Y12, Y11
- VPXOR Y9, Y10, Y9
- VPAND Y8, Y0, Y10
- VPSRLQ $0x04, Y8, Y12
- VPAND Y0, Y12, Y12
- VBROADCASTI128 32(AX), Y13
- VBROADCASTI128 96(AX), Y14
- VPSHUFB Y10, Y13, Y13
- VPSHUFB Y10, Y14, Y10
- VPXOR Y11, Y13, Y11
- VPXOR Y9, Y10, Y9
- VBROADCASTI128 48(AX), Y13
- VBROADCASTI128 112(AX), Y10
- VPSHUFB Y12, Y13, Y13
- VPSHUFB Y12, Y10, Y10
- XOR3WAY( $0x00, Y11, Y13, Y3)
- XOR3WAY( $0x00, Y9, Y10, Y4)
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y4, Y8, Y8
- VPXOR Y1, Y3, Y3
- VPXOR Y2, Y4, Y4
- VMOVDQU Y1, (DI)
- VMOVDQU Y2, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y3, (R8)
- VMOVDQU Y4, 32(R8)
- ADDQ $0x40, R8
- VPXOR Y5, Y7, Y7
- VPXOR Y6, Y8, Y8
- VMOVDQU Y5, (R9)
- VMOVDQU Y6, 32(R9)
- ADDQ $0x40, R9
- VMOVDQU Y7, (CX)
- VMOVDQU Y8, 32(CX)
- ADDQ $0x40, CX
- SUBQ $0x40, BX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, SSE2
-TEXT ·ifftDIT4_avx2_7(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
- VPXOR Y0, Y2, Y2
- VPXOR Y1, Y3, Y3
- VMOVDQU (R8), Y4
- VMOVDQU 32(R8), Y5
- VMOVDQU (AX), Y6
- VMOVDQU 32(AX), Y7
- VPXOR Y4, Y6, Y6
- VPXOR Y5, Y7, Y7
- VPXOR Y0, Y4, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y6, (AX)
- VMOVDQU Y7, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func fftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
-// Requires: AVX, AVX2, SSE2
-TEXT ·fftDIT4_avx2_7(SB), NOSPLIT, $0-56
- // dist must be multiplied by 24 (size of slice header)
- MOVQ table01+32(FP), AX
- MOVQ table23+40(FP), AX
- MOVQ table02+48(FP), AX
- MOVQ $0x0000000f, AX
- MOVQ AX, X0
- VPBROADCASTB X0, Y0
- MOVQ dist+24(FP), AX
- MOVQ work_base+0(FP), CX
- MOVQ 8(CX), DX
- XORQ BX, BX
- MOVQ (CX)(BX*1), SI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), DI
- ADDQ AX, BX
- MOVQ (CX)(BX*1), R8
- ADDQ AX, BX
- MOVQ (CX)(BX*1), AX
-
-loop:
- VMOVDQU (SI), Y0
- VMOVDQU 32(SI), Y1
- VMOVDQU (R8), Y4
- VMOVDQU 32(R8), Y5
- VMOVDQU (DI), Y2
- VMOVDQU 32(DI), Y3
- VMOVDQU (AX), Y6
- VMOVDQU 32(AX), Y7
- VPXOR Y0, Y4, Y4
- VPXOR Y1, Y5, Y5
- VPXOR Y2, Y6, Y6
- VPXOR Y3, Y7, Y7
- VPXOR Y0, Y2, Y2
- VPXOR Y1, Y3, Y3
- VMOVDQU Y0, (SI)
- VMOVDQU Y1, 32(SI)
- ADDQ $0x40, SI
- VMOVDQU Y2, (DI)
- VMOVDQU Y3, 32(DI)
- ADDQ $0x40, DI
- VPXOR Y4, Y6, Y6
- VPXOR Y5, Y7, Y7
- VMOVDQU Y4, (R8)
- VMOVDQU Y5, 32(R8)
- ADDQ $0x40, R8
- VMOVDQU Y6, (AX)
- VMOVDQU Y7, 32(AX)
- ADDQ $0x40, AX
- SUBQ $0x40, DX
- JNZ loop
- VZEROUPPER
- RET
-
-// func ifftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
-// Requires: SSE, SSE2, SSSE3
-TEXT ·ifftDIT2_ssse3(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- MOVUPS (AX), X0
- MOVUPS 64(AX), X1
- MOVUPS 16(AX), X2
- MOVUPS 80(AX), X3
- MOVUPS 32(AX), X4
- MOVUPS 96(AX), X5
- XORPS X6, X6
- MOVQ $0x0000000f, CX
- MOVQ CX, X7
- PSHUFB X6, X7
- MOVQ x_len+8(FP), CX
- MOVQ x_base+0(FP), DX
- MOVQ y_base+24(FP), BX
-
-loop:
- MOVUPS (DX), X6
- MOVUPS 32(DX), X8
- MOVUPS (BX), X9
- MOVUPS 32(BX), X10
- PXOR X6, X9
- PXOR X8, X10
- MOVUPS X9, (BX)
- MOVUPS X10, 32(BX)
- MOVAPS X9, X11
- PSRLQ $0x04, X11
- MOVAPS X9, X9
- PAND X7, X9
- PAND X7, X11
- MOVUPS X0, X12
- MOVUPS X1, X13
- PSHUFB X9, X12
- PSHUFB X9, X13
- MOVUPS X2, X9
- MOVUPS X3, X14
- PSHUFB X11, X9
- PSHUFB X11, X14
- PXOR X9, X12
- PXOR X14, X13
- MOVAPS X10, X9
- MOVAPS X10, X10
- PAND X7, X9
- PSRLQ $0x04, X10
- PAND X7, X10
- MOVUPS X4, X11
- MOVUPS X5, X14
- PSHUFB X9, X11
- PSHUFB X9, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS 48(AX), X11
- MOVUPS 112(AX), X14
- PSHUFB X10, X11
- PSHUFB X10, X14
- PXOR X11, X12
- PXOR X14, X13
- PXOR X12, X6
- PXOR X13, X8
- MOVUPS X6, (DX)
- MOVUPS X8, 32(DX)
- MOVUPS 16(DX), X6
- MOVUPS 48(DX), X8
- MOVUPS 16(BX), X9
- MOVUPS 48(BX), X10
- PXOR X6, X9
- PXOR X8, X10
- MOVUPS X9, 16(BX)
- MOVUPS X10, 48(BX)
- MOVAPS X9, X11
- PSRLQ $0x04, X11
- MOVAPS X9, X9
- PAND X7, X9
- PAND X7, X11
- MOVUPS X0, X12
- MOVUPS X1, X13
- PSHUFB X9, X12
- PSHUFB X9, X13
- MOVUPS X2, X9
- MOVUPS X3, X14
- PSHUFB X11, X9
- PSHUFB X11, X14
- PXOR X9, X12
- PXOR X14, X13
- MOVAPS X10, X9
- MOVAPS X10, X10
- PAND X7, X9
- PSRLQ $0x04, X10
- PAND X7, X10
- MOVUPS X4, X11
- MOVUPS X5, X14
- PSHUFB X9, X11
- PSHUFB X9, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS 48(AX), X11
- MOVUPS 112(AX), X14
- PSHUFB X10, X11
- PSHUFB X10, X14
- PXOR X11, X12
- PXOR X14, X13
- PXOR X12, X6
- PXOR X13, X8
- MOVUPS X6, 16(DX)
- MOVUPS X8, 48(DX)
- ADDQ $0x40, DX
- ADDQ $0x40, BX
- SUBQ $0x40, CX
- JNZ loop
- RET
-
-// func fftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
-// Requires: SSE, SSE2, SSSE3
-TEXT ·fftDIT2_ssse3(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- MOVUPS (AX), X0
- MOVUPS 64(AX), X1
- MOVUPS 16(AX), X2
- MOVUPS 80(AX), X3
- MOVUPS 32(AX), X4
- MOVUPS 96(AX), X5
- XORPS X6, X6
- MOVQ $0x0000000f, CX
- MOVQ CX, X7
- PSHUFB X6, X7
- MOVQ x_len+8(FP), CX
- MOVQ x_base+0(FP), DX
- MOVQ y_base+24(FP), BX
-
-loop:
- MOVUPS (BX), X9
- MOVUPS 32(BX), X10
- MOVAPS X9, X8
- PSRLQ $0x04, X8
- MOVAPS X9, X6
- PAND X7, X6
- PAND X7, X8
- MOVUPS X0, X11
- MOVUPS X1, X12
- PSHUFB X6, X11
- PSHUFB X6, X12
- MOVUPS X2, X6
- MOVUPS X3, X13
- PSHUFB X8, X6
- PSHUFB X8, X13
- PXOR X6, X11
- PXOR X13, X12
- MOVAPS X10, X6
- MOVAPS X10, X8
- PAND X7, X6
- PSRLQ $0x04, X8
- PAND X7, X8
- MOVUPS X4, X13
- MOVUPS X5, X14
- PSHUFB X6, X13
- PSHUFB X6, X14
- PXOR X13, X11
- PXOR X14, X12
- MOVUPS 48(AX), X13
- MOVUPS 112(AX), X14
- PSHUFB X8, X13
- PSHUFB X8, X14
- PXOR X13, X11
- PXOR X14, X12
- MOVUPS (DX), X6
- MOVUPS 32(DX), X8
- PXOR X11, X6
- PXOR X12, X8
- MOVUPS X6, (DX)
- MOVUPS X8, 32(DX)
- PXOR X6, X9
- PXOR X8, X10
- MOVUPS X9, (BX)
- MOVUPS X10, 32(BX)
- MOVUPS 16(BX), X9
- MOVUPS 48(BX), X10
- MOVAPS X9, X8
- PSRLQ $0x04, X8
- MOVAPS X9, X6
- PAND X7, X6
- PAND X7, X8
- MOVUPS X0, X11
- MOVUPS X1, X12
- PSHUFB X6, X11
- PSHUFB X6, X12
- MOVUPS X2, X6
- MOVUPS X3, X13
- PSHUFB X8, X6
- PSHUFB X8, X13
- PXOR X6, X11
- PXOR X13, X12
- MOVAPS X10, X6
- MOVAPS X10, X8
- PAND X7, X6
- PSRLQ $0x04, X8
- PAND X7, X8
- MOVUPS X4, X13
- MOVUPS X5, X14
- PSHUFB X6, X13
- PSHUFB X6, X14
- PXOR X13, X11
- PXOR X14, X12
- MOVUPS 48(AX), X13
- MOVUPS 112(AX), X14
- PSHUFB X8, X13
- PSHUFB X8, X14
- PXOR X13, X11
- PXOR X14, X12
- MOVUPS 16(DX), X6
- MOVUPS 48(DX), X8
- PXOR X11, X6
- PXOR X12, X8
- MOVUPS X6, 16(DX)
- MOVUPS X8, 48(DX)
- PXOR X6, X9
- PXOR X8, X10
- MOVUPS X9, 16(BX)
- MOVUPS X10, 48(BX)
- ADDQ $0x40, DX
- ADDQ $0x40, BX
- SUBQ $0x40, CX
- JNZ loop
- RET
-
-// func mulgf16_ssse3(x []byte, y []byte, table *[128]uint8)
-// Requires: SSE, SSE2, SSSE3
-TEXT ·mulgf16_ssse3(SB), NOSPLIT, $0-56
- MOVQ table+48(FP), AX
- MOVUPS (AX), X0
- MOVUPS 64(AX), X1
- MOVUPS 16(AX), X2
- MOVUPS 80(AX), X3
- MOVUPS 32(AX), X4
- MOVUPS 96(AX), X5
- MOVUPS 48(AX), X6
- MOVUPS 112(AX), X7
- MOVQ x_len+8(FP), AX
- MOVQ x_base+0(FP), CX
- MOVQ y_base+24(FP), DX
- XORPS X8, X8
- MOVQ $0x0000000f, BX
- MOVQ BX, X9
- PSHUFB X8, X9
-
-loop:
- MOVUPS (DX), X8
- MOVUPS 32(DX), X10
- MOVAPS X8, X11
- PSRLQ $0x04, X11
- MOVAPS X8, X8
- PAND X9, X8
- PAND X9, X11
- MOVUPS X0, X12
- MOVUPS X1, X13
- PSHUFB X8, X12
- PSHUFB X8, X13
- MOVUPS X2, X8
- MOVUPS X3, X14
- PSHUFB X11, X8
- PSHUFB X11, X14
- PXOR X8, X12
- PXOR X14, X13
- MOVAPS X10, X8
- MOVAPS X10, X10
- PAND X9, X8
- PSRLQ $0x04, X10
- PAND X9, X10
- MOVUPS X4, X11
- MOVUPS X5, X14
- PSHUFB X8, X11
- PSHUFB X8, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS X6, X11
- MOVUPS X7, X14
- PSHUFB X10, X11
- PSHUFB X10, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS X12, (CX)
- MOVUPS X13, 32(CX)
- MOVUPS 16(DX), X8
- MOVUPS 48(DX), X10
- MOVAPS X8, X11
- PSRLQ $0x04, X11
- MOVAPS X8, X8
- PAND X9, X8
- PAND X9, X11
- MOVUPS X0, X12
- MOVUPS X1, X13
- PSHUFB X8, X12
- PSHUFB X8, X13
- MOVUPS X2, X8
- MOVUPS X3, X14
- PSHUFB X11, X8
- PSHUFB X11, X14
- PXOR X8, X12
- PXOR X14, X13
- MOVAPS X10, X8
- MOVAPS X10, X10
- PAND X9, X8
- PSRLQ $0x04, X10
- PAND X9, X10
- MOVUPS X4, X11
- MOVUPS X5, X14
- PSHUFB X8, X11
- PSHUFB X8, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS X6, X11
- MOVUPS X7, X14
- PSHUFB X10, X11
- PSHUFB X10, X14
- PXOR X11, X12
- PXOR X14, X13
- MOVUPS X12, 16(CX)
- MOVUPS X13, 48(CX)
- ADDQ $0x40, CX
- ADDQ $0x40, DX
- SUBQ $0x40, AX
- JNZ loop
- RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
deleted file mode 100644
index 303d6a9..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
+++ /dev/null
@@ -1,18 +0,0 @@
-//go:build !amd64 || noasm || appengine || gccgo || nogen
-// +build !amd64 noasm appengine gccgo nogen
-
-package reedsolomon
-
-const maxAvx2Inputs = 1
-const maxAvx2Outputs = 1
-const minAvx2Size = 1
-const avxSizeMask = 0
-const avx2CodeGen = false
-
-func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
- panic("avx2 codegen not available")
-}
-
-func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
- panic("avx2 codegen not available")
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
deleted file mode 100644
index 3078114..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
+++ /dev/null
@@ -1,694 +0,0 @@
-// Code generated by command: go generate gen.go. DO NOT EDIT.
-
-//go:build !appengine && !noasm && gc && !nogen
-// +build !appengine,!noasm,gc,!nogen
-
-package reedsolomon
-
-import (
- "fmt"
-)
-
-const (
- avx2CodeGen = true
- maxAvx2Inputs = 10
- maxAvx2Outputs = 10
- minAvx2Size = 64
- avxSizeMask = maxInt - (minAvx2Size - 1)
-)
-
-func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
- n := (stop - start) & avxSizeMask
-
- switch len(in) {
- case 1:
- switch len(out) {
- case 1:
- mulAvxTwo_1x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_1x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_1x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_1x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_1x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_1x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_1x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_1x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_1x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_1x10(matrix, in, out, start, n)
- return n
- }
- case 2:
- switch len(out) {
- case 1:
- mulAvxTwo_2x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_2x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_2x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_2x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_2x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_2x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_2x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_2x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_2x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_2x10(matrix, in, out, start, n)
- return n
- }
- case 3:
- switch len(out) {
- case 1:
- mulAvxTwo_3x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_3x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_3x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_3x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_3x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_3x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_3x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_3x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_3x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_3x10(matrix, in, out, start, n)
- return n
- }
- case 4:
- switch len(out) {
- case 1:
- mulAvxTwo_4x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_4x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_4x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_4x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_4x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_4x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_4x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_4x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_4x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_4x10(matrix, in, out, start, n)
- return n
- }
- case 5:
- switch len(out) {
- case 1:
- mulAvxTwo_5x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_5x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_5x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_5x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_5x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_5x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_5x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_5x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_5x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_5x10(matrix, in, out, start, n)
- return n
- }
- case 6:
- switch len(out) {
- case 1:
- mulAvxTwo_6x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_6x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_6x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_6x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_6x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_6x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_6x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_6x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_6x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_6x10(matrix, in, out, start, n)
- return n
- }
- case 7:
- switch len(out) {
- case 1:
- mulAvxTwo_7x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_7x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_7x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_7x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_7x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_7x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_7x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_7x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_7x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_7x10(matrix, in, out, start, n)
- return n
- }
- case 8:
- switch len(out) {
- case 1:
- mulAvxTwo_8x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_8x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_8x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_8x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_8x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_8x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_8x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_8x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_8x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_8x10(matrix, in, out, start, n)
- return n
- }
- case 9:
- switch len(out) {
- case 1:
- mulAvxTwo_9x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_9x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_9x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_9x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_9x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_9x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_9x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_9x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_9x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_9x10(matrix, in, out, start, n)
- return n
- }
- case 10:
- switch len(out) {
- case 1:
- mulAvxTwo_10x1_64(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_10x2_64(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_10x3_64(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_10x4(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_10x5(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_10x6(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_10x7(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_10x8(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_10x9(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_10x10(matrix, in, out, start, n)
- return n
- }
- }
- panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
-}
-
-func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
- n := (stop - start) & avxSizeMask
-
- switch len(in) {
- case 1:
- switch len(out) {
- case 1:
- mulAvxTwo_1x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_1x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_1x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_1x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_1x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_1x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_1x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_1x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_1x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_1x10Xor(matrix, in, out, start, n)
- return n
- }
- case 2:
- switch len(out) {
- case 1:
- mulAvxTwo_2x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_2x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_2x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_2x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_2x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_2x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_2x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_2x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_2x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_2x10Xor(matrix, in, out, start, n)
- return n
- }
- case 3:
- switch len(out) {
- case 1:
- mulAvxTwo_3x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_3x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_3x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_3x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_3x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_3x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_3x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_3x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_3x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_3x10Xor(matrix, in, out, start, n)
- return n
- }
- case 4:
- switch len(out) {
- case 1:
- mulAvxTwo_4x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_4x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_4x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_4x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_4x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_4x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_4x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_4x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_4x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_4x10Xor(matrix, in, out, start, n)
- return n
- }
- case 5:
- switch len(out) {
- case 1:
- mulAvxTwo_5x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_5x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_5x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_5x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_5x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_5x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_5x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_5x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_5x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_5x10Xor(matrix, in, out, start, n)
- return n
- }
- case 6:
- switch len(out) {
- case 1:
- mulAvxTwo_6x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_6x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_6x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_6x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_6x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_6x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_6x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_6x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_6x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_6x10Xor(matrix, in, out, start, n)
- return n
- }
- case 7:
- switch len(out) {
- case 1:
- mulAvxTwo_7x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_7x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_7x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_7x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_7x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_7x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_7x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_7x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_7x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_7x10Xor(matrix, in, out, start, n)
- return n
- }
- case 8:
- switch len(out) {
- case 1:
- mulAvxTwo_8x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_8x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_8x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_8x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_8x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_8x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_8x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_8x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_8x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_8x10Xor(matrix, in, out, start, n)
- return n
- }
- case 9:
- switch len(out) {
- case 1:
- mulAvxTwo_9x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_9x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_9x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_9x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_9x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_9x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_9x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_9x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_9x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_9x10Xor(matrix, in, out, start, n)
- return n
- }
- case 10:
- switch len(out) {
- case 1:
- mulAvxTwo_10x1_64Xor(matrix, in, out, start, n)
- return n
- case 2:
- mulAvxTwo_10x2_64Xor(matrix, in, out, start, n)
- return n
- case 3:
- mulAvxTwo_10x3_64Xor(matrix, in, out, start, n)
- return n
- case 4:
- mulAvxTwo_10x4Xor(matrix, in, out, start, n)
- return n
- case 5:
- mulAvxTwo_10x5Xor(matrix, in, out, start, n)
- return n
- case 6:
- mulAvxTwo_10x6Xor(matrix, in, out, start, n)
- return n
- case 7:
- mulAvxTwo_10x7Xor(matrix, in, out, start, n)
- return n
- case 8:
- mulAvxTwo_10x8Xor(matrix, in, out, start, n)
- return n
- case 9:
- mulAvxTwo_10x9Xor(matrix, in, out, start, n)
- return n
- case 10:
- mulAvxTwo_10x10Xor(matrix, in, out, start, n)
- return n
- }
- }
- panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
deleted file mode 100644
index 47e24d7..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ /dev/null
@@ -1,69 +0,0 @@
-//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo) && (!ppc64le || noasm || appengine || gccgo)
-// +build !amd64 noasm appengine gccgo
-// +build !arm64 noasm appengine gccgo
-// +build !ppc64le noasm appengine gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package reedsolomon
-
-func galMulSlice(c byte, in, out []byte, o *options) {
- out = out[:len(in)]
- if c == 1 {
- copy(out, in)
- return
- }
- mt := mulTable[c][:256]
- for n, input := range in {
- out[n] = mt[input]
- }
-}
-
-func galMulSliceXor(c byte, in, out []byte, o *options) {
- out = out[:len(in)]
- if c == 1 {
- sliceXor(in, out, o)
- return
- }
- mt := mulTable[c][:256]
- for n, input := range in {
- out[n] ^= mt[input]
- }
-}
-
-// simple slice xor
-func sliceXor(in, out []byte, o *options) {
- sliceXorGo(in, out, o)
-}
-
-func init() {
- defaultOptions.useAVX512 = false
-}
-
-// 4-way butterfly
-func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 4-way butterfly
-func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 2-way butterfly forward
-func fftDIT2(x, y []byte, log_m ffe, o *options) {
- // Reference version:
- refMulAdd(x, y, log_m)
- sliceXorGo(x, y, o)
-}
-
-// 2-way butterfly inverse
-func ifftDIT2(x, y []byte, log_m ffe, o *options) {
- // Reference version:
- sliceXorGo(x, y, o)
- refMulAdd(x, y, log_m)
-}
-
-func mulgf16(x, y []byte, log_m ffe, o *options) {
- refMul(x, y, log_m)
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
deleted file mode 100644
index e67905b..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
+++ /dev/null
@@ -1,14 +0,0 @@
-//go:build !amd64 || noasm || appengine || gccgo
-// +build !amd64 noasm appengine gccgo
-
-// Copyright 2020, Klaus Post, see LICENSE for details.
-
-package reedsolomon
-
-func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
- panic("codeSomeShardsAvx512 should not be called if built without asm")
-}
-
-func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
- panic("codeSomeShardsAvx512P should not be called if built without asm")
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
deleted file mode 100644
index 415828a..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
+++ /dev/null
@@ -1,102 +0,0 @@
-//go:build !noasm && !appengine && !gccgo
-// +build !noasm,!appengine,!gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2018, Minio, Inc.
-
-package reedsolomon
-
-//go:noescape
-func galMulPpc(low, high, in, out []byte)
-
-//go:noescape
-func galMulPpcXor(low, high, in, out []byte)
-
-// This is what the assembler routines do in blocks of 16 bytes:
-/*
-func galMulPpc(low, high, in, out []byte) {
- for n, input := range in {
- l := input & 0xf
- h := input >> 4
- out[n] = low[l] ^ high[h]
- }
-}
-func galMulPpcXor(low, high, in, out []byte) {
- for n, input := range in {
- l := input & 0xf
- h := input >> 4
- out[n] ^= low[l] ^ high[h]
- }
-}
-*/
-
-func galMulSlice(c byte, in, out []byte, o *options) {
- if c == 1 {
- copy(out, in)
- return
- }
- done := (len(in) >> 4) << 4
- if done > 0 {
- galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
- }
- remain := len(in) - done
- if remain > 0 {
- mt := mulTable[c][:256]
- for i := done; i < len(in); i++ {
- out[i] = mt[in[i]]
- }
- }
-}
-
-func galMulSliceXor(c byte, in, out []byte, o *options) {
- if c == 1 {
- sliceXor(in, out, o)
- return
- }
- done := (len(in) >> 4) << 4
- if done > 0 {
- galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
- }
- remain := len(in) - done
- if remain > 0 {
- mt := mulTable[c][:256]
- for i := done; i < len(in); i++ {
- out[i] ^= mt[in[i]]
- }
- }
-}
-
-// slice galois add
-func sliceXor(in, out []byte, o *options) {
- for n, input := range in {
- out[n] ^= input
- }
-}
-
-// 4-way butterfly
-func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 4-way butterfly
-func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
-}
-
-// 2-way butterfly forward
-func fftDIT2(x, y []byte, log_m ffe, o *options) {
- // Reference version:
- refMulAdd(x, y, log_m)
- sliceXor(x, y, o)
-}
-
-// 2-way butterfly inverse
-func ifftDIT2(x, y []byte, log_m ffe, o *options) {
- // Reference version:
- sliceXor(x, y, o)
- refMulAdd(x, y, log_m)
-}
-
-func mulgf16(x, y []byte, log_m ffe, o *options) {
- refMul(x, y, log_m)
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
deleted file mode 100644
index 7213c61..0000000
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
+++ /dev/null
@@ -1,126 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2018, Minio, Inc.
-
-#include "textflag.h"
-
-#define LOW R3
-#define HIGH R4
-#define IN R5
-#define LEN R6
-#define OUT R7
-#define CONSTANTS R8
-#define OFFSET R9
-#define OFFSET1 R10
-#define OFFSET2 R11
-
-#define X6 VS34
-#define X6_ V2
-#define X7 VS35
-#define X7_ V3
-#define MSG VS36
-#define MSG_ V4
-#define MSG_HI VS37
-#define MSG_HI_ V5
-#define RESULT VS38
-#define RESULT_ V6
-#define ROTATE VS39
-#define ROTATE_ V7
-#define MASK VS40
-#define MASK_ V8
-#define FLIP VS41
-#define FLIP_ V9
-
-// func galMulPpc(low, high, in, out []byte)
-TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
- MOVD low+0(FP), LOW
- MOVD high+24(FP), HIGH
- MOVD in+48(FP), IN
- MOVD in_len+56(FP), LEN
- MOVD out+72(FP), OUT
-
- MOVD $16, OFFSET1
- MOVD $32, OFFSET2
-
- MOVD $·constants(SB), CONSTANTS
- LXVD2X (CONSTANTS)(R0), ROTATE
- LXVD2X (CONSTANTS)(OFFSET1), MASK
- LXVD2X (CONSTANTS)(OFFSET2), FLIP
-
- LXVD2X (LOW)(R0), X6
- LXVD2X (HIGH)(R0), X7
- VPERM X6_, V31, FLIP_, X6_
- VPERM X7_, V31, FLIP_, X7_
-
- MOVD $0, OFFSET
-
-loop:
- LXVD2X (IN)(OFFSET), MSG
-
- VSRB MSG_, ROTATE_, MSG_HI_
- VAND MSG_, MASK_, MSG_
- VPERM X6_, V31, MSG_, MSG_
- VPERM X7_, V31, MSG_HI_, MSG_HI_
-
- VXOR MSG_, MSG_HI_, MSG_
-
- STXVD2X MSG, (OUT)(OFFSET)
-
- ADD $16, OFFSET, OFFSET
- CMP LEN, OFFSET
- BGT loop
- RET
-
-// func galMulPpcXorlow, high, in, out []byte)
-TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
- MOVD low+0(FP), LOW
- MOVD high+24(FP), HIGH
- MOVD in+48(FP), IN
- MOVD in_len+56(FP), LEN
- MOVD out+72(FP), OUT
-
- MOVD $16, OFFSET1
- MOVD $32, OFFSET2
-
- MOVD $·constants(SB), CONSTANTS
- LXVD2X (CONSTANTS)(R0), ROTATE
- LXVD2X (CONSTANTS)(OFFSET1), MASK
- LXVD2X (CONSTANTS)(OFFSET2), FLIP
-
- LXVD2X (LOW)(R0), X6
- LXVD2X (HIGH)(R0), X7
- VPERM X6_, V31, FLIP_, X6_
- VPERM X7_, V31, FLIP_, X7_
-
- MOVD $0, OFFSET
-
-loopXor:
- LXVD2X (IN)(OFFSET), MSG
- LXVD2X (OUT)(OFFSET), RESULT
-
- VSRB MSG_, ROTATE_, MSG_HI_
- VAND MSG_, MASK_, MSG_
- VPERM X6_, V31, MSG_, MSG_
- VPERM X7_, V31, MSG_HI_, MSG_HI_
-
- VXOR MSG_, MSG_HI_, MSG_
- VXOR MSG_, RESULT_, RESULT_
-
- STXVD2X RESULT, (OUT)(OFFSET)
-
- ADD $16, OFFSET, OFFSET
- CMP LEN, OFFSET
- BGT loopXor
- RET
-
-DATA ·constants+0x0(SB)/8, $0x0404040404040404
-DATA ·constants+0x8(SB)/8, $0x0404040404040404
-DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
-DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
-DATA ·constants+0x20(SB)/8, $0x0706050403020100
-DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
-
-GLOBL ·constants(SB), 8, $48
diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
deleted file mode 100644
index 3f97f81..0000000
--- a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * A thread-safe tree which caches inverted matrices.
- *
- * Copyright 2016, Peter Collins
- */
-
-package reedsolomon
-
-import (
- "errors"
- "sync"
-)
-
-// The tree uses a Reader-Writer mutex to make it thread-safe
-// when accessing cached matrices and inserting new ones.
-type inversionTree struct {
- mutex sync.RWMutex
- root inversionNode
-}
-
-type inversionNode struct {
- matrix matrix
- children []*inversionNode
-}
-
-// newInversionTree initializes a tree for storing inverted matrices.
-// Note that the root node is the identity matrix as it implies
-// there were no errors with the original data.
-func newInversionTree(dataShards, parityShards int) *inversionTree {
- identity, _ := identityMatrix(dataShards)
- return &inversionTree{
- root: inversionNode{
- matrix: identity,
- children: make([]*inversionNode, dataShards+parityShards),
- },
- }
-}
-
-// GetInvertedMatrix returns the cached inverted matrix or nil if it
-// is not found in the tree keyed on the indices of invalid rows.
-func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
- if t == nil {
- return nil
- }
- // Lock the tree for reading before accessing the tree.
- t.mutex.RLock()
- defer t.mutex.RUnlock()
-
- // If no invalid indices were give we should return the root
- // identity matrix.
- if len(invalidIndices) == 0 {
- return t.root.matrix
- }
-
- // Recursively search for the inverted matrix in the tree, passing in
- // 0 as the parent index as we start at the root of the tree.
- return t.root.getInvertedMatrix(invalidIndices, 0)
-}
-
-// errAlreadySet is returned if the root node matrix is overwritten
-var errAlreadySet = errors.New("the root node identity matrix is already set")
-
-// InsertInvertedMatrix inserts a new inverted matrix into the tree
-// keyed by the indices of invalid rows. The total number of shards
-// is required for creating the proper length lists of child nodes for
-// each node.
-func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
- if t == nil {
- return nil
- }
- // If no invalid indices were given then we are done because the
- // root node is already set with the identity matrix.
- if len(invalidIndices) == 0 {
- return errAlreadySet
- }
-
- if !matrix.IsSquare() {
- return errNotSquare
- }
-
- // Lock the tree for writing and reading before accessing the tree.
- t.mutex.Lock()
- defer t.mutex.Unlock()
-
- // Recursively create nodes for the inverted matrix in the tree until
- // we reach the node to insert the matrix to. We start by passing in
- // 0 as the parent index as we start at the root of the tree.
- t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
-
- return nil
-}
-
-func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
- // Get the child node to search next from the list of children. The
- // list of children starts relative to the parent index passed in
- // because the indices of invalid rows is sorted (by default). As we
- // search recursively, the first invalid index gets popped off the list,
- // so when searching through the list of children, use that first invalid
- // index to find the child node.
- firstIndex := invalidIndices[0]
- node := n.children[firstIndex-parent]
-
- // If the child node doesn't exist in the list yet, fail fast by
- // returning, so we can construct and insert the proper inverted matrix.
- if node == nil {
- return nil
- }
-
- // If there's more than one invalid index left in the list we should
- // keep searching recursively.
- if len(invalidIndices) > 1 {
- // Search recursively on the child node by passing in the invalid indices
- // with the first index popped off the front. Also the parent index to
- // pass down is the first index plus one.
- return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
- }
- // If there aren't any more invalid indices to search, we've found our
- // node. Return it, however keep in mind that the matrix could still be
- // nil because intermediary nodes in the tree are created sometimes with
- // their inversion matrices uninitialized.
- return node.matrix
-}
-
-func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
- // As above, get the child node to search next from the list of children.
- // The list of children starts relative to the parent index passed in
- // because the indices of invalid rows is sorted (by default). As we
- // search recursively, the first invalid index gets popped off the list,
- // so when searching through the list of children, use that first invalid
- // index to find the child node.
- firstIndex := invalidIndices[0]
- node := n.children[firstIndex-parent]
-
- // If the child node doesn't exist in the list yet, create a new
- // node because we have the writer lock and add it to the list
- // of children.
- if node == nil {
- // Make the length of the list of children equal to the number
- // of shards minus the first invalid index because the list of
- // invalid indices is sorted, so only this length of errors
- // are possible in the tree.
- node = &inversionNode{
- children: make([]*inversionNode, shards-firstIndex),
- }
- // Insert the new node into the tree at the first index relative
- // to the parent index that was given in this recursive call.
- n.children[firstIndex-parent] = node
- }
-
- // If there's more than one invalid index left in the list we should
- // keep searching recursively in order to find the node to add our
- // matrix.
- if len(invalidIndices) > 1 {
- // As above, search recursively on the child node by passing in
- // the invalid indices with the first index popped off the front.
- // Also the total number of shards and parent index are passed down
- // which is equal to the first index plus one.
- node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
- } else {
- // If there aren't any more invalid indices to search, we've found our
- // node. Cache the inverted matrix in this node.
- node.matrix = matrix
- }
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/leopard.go b/vendor/github.com/klauspost/reedsolomon/leopard.go
deleted file mode 100644
index e9d122a..0000000
--- a/vendor/github.com/klauspost/reedsolomon/leopard.go
+++ /dev/null
@@ -1,1234 +0,0 @@
-package reedsolomon
-
-// This is a O(n*log n) implementation of Reed-Solomon
-// codes, ported from the C++ library https://github.com/catid/leopard.
-//
-// The implementation is based on the paper
-//
-// S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
-// "Novel Polynomial Basis with Fast Fourier Transform
-// and Its Application to Reed-Solomon Erasure Codes"
-// IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
-
-import (
- "bytes"
- "io"
- "math/bits"
- "sync"
- "unsafe"
-
- "github.com/klauspost/cpuid/v2"
-)
-
-// leopardFF16 is like reedSolomon but for more than 256 total shards.
-type leopardFF16 struct {
- dataShards int // Number of data shards, should not be modified.
- parityShards int // Number of parity shards, should not be modified.
- totalShards int // Total number of shards. Calculated, and should not be modified.
-
- workPool sync.Pool
-
- o options
-}
-
-// newFF16 is like New, but for more than 256 total shards.
-func newFF16(dataShards, parityShards int, opt options) (*leopardFF16, error) {
- initConstants()
-
- if dataShards <= 0 || parityShards <= 0 {
- return nil, ErrInvShardNum
- }
-
- if dataShards+parityShards > 65536 {
- return nil, ErrMaxShardNum
- }
-
- r := &leopardFF16{
- dataShards: dataShards,
- parityShards: parityShards,
- totalShards: dataShards + parityShards,
- o: opt,
- }
- return r, nil
-}
-
-var _ = Extensions(&leopardFF16{})
-
-func (r *leopardFF16) ShardSizeMultiple() int {
- return 64
-}
-
-func (r *leopardFF16) DataShards() int {
- return r.dataShards
-}
-
-func (r *leopardFF16) ParityShards() int {
- return r.parityShards
-}
-
-func (r *leopardFF16) TotalShards() int {
- return r.parityShards
-}
-
-type ffe uint16
-
-const (
- bitwidth = 16
- order = 1 << bitwidth
- modulus = order - 1
- polynomial = 0x1002D
-)
-
-var (
- fftSkew *[modulus]ffe
- logWalsh *[order]ffe
-)
-
-// Logarithm Tables
-var (
- logLUT *[order]ffe
- expLUT *[order]ffe
-)
-
-// Stores the partial products of x * y at offset x + y * 65536
-// Repeated accesses from the same y value are faster
-var mul16LUTs *[order]mul16LUT
-
-type mul16LUT struct {
- // Contains Lo product as a single lookup.
- // Should be XORed with Hi lookup for result.
- Lo [256]ffe
- Hi [256]ffe
-}
-
-// Stores lookup for avx2
-var multiply256LUT *[order][8 * 16]byte
-
-func (r *leopardFF16) Encode(shards [][]byte) error {
- if len(shards) != r.totalShards {
- return ErrTooFewShards
- }
-
- if err := checkShards(shards, false); err != nil {
- return err
- }
- return r.encode(shards)
-}
-
-func (r *leopardFF16) encode(shards [][]byte) error {
- shardSize := shardSize(shards)
- if shardSize%64 != 0 {
- return ErrShardSize
- }
-
- m := ceilPow2(r.parityShards)
- var work [][]byte
- if w, ok := r.workPool.Get().([][]byte); ok {
- work = w
- }
- if cap(work) >= m*2 {
- work = work[:m*2]
- } else {
- work = make([][]byte, m*2)
- }
- for i := range work {
- if cap(work[i]) < shardSize {
- work[i] = make([]byte, shardSize)
- } else {
- work[i] = work[i][:shardSize]
- }
- }
- defer r.workPool.Put(work)
-
- mtrunc := m
- if r.dataShards < mtrunc {
- mtrunc = r.dataShards
- }
-
- skewLUT := fftSkew[m-1:]
-
- sh := shards
- ifftDITEncoder(
- sh[:r.dataShards],
- mtrunc,
- work,
- nil, // No xor output
- m,
- skewLUT,
- &r.o,
- )
-
- lastCount := r.dataShards % m
- if m >= r.dataShards {
- goto skip_body
- }
-
- // For sets of m data pieces:
- for i := m; i+m <= r.dataShards; i += m {
- sh = sh[m:]
- skewLUT = skewLUT[m:]
-
- // work <- work xor IFFT(data + i, m, m + i)
-
- ifftDITEncoder(
- sh, // data source
- m,
- work[m:], // temporary workspace
- work, // xor destination
- m,
- skewLUT,
- &r.o,
- )
- }
-
- // Handle final partial set of m pieces:
- if lastCount != 0 {
- sh = sh[m:]
- skewLUT = skewLUT[m:]
-
- // work <- work xor IFFT(data + i, m, m + i)
-
- ifftDITEncoder(
- sh, // data source
- lastCount,
- work[m:], // temporary workspace
- work, // xor destination
- m,
- skewLUT,
- &r.o,
- )
- }
-
-skip_body:
- // work <- FFT(work, m, 0)
- fftDIT(work, r.parityShards, m, fftSkew[:], &r.o)
-
- for i, w := range work[:r.parityShards] {
- sh := shards[i+r.dataShards]
- if cap(sh) >= shardSize {
- sh = append(sh[:0], w...)
- } else {
- sh = w
- }
- shards[i+r.dataShards] = sh
- }
-
- return nil
-}
-
-func (r *leopardFF16) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
- return ErrNotSupported
-}
-
-func (r *leopardFF16) Join(dst io.Writer, shards [][]byte, outSize int) error {
- // Do we have enough shards?
- if len(shards) < r.dataShards {
- return ErrTooFewShards
- }
- shards = shards[:r.dataShards]
-
- // Do we have enough data?
- size := 0
- for _, shard := range shards {
- if shard == nil {
- return ErrReconstructRequired
- }
- size += len(shard)
-
- // Do we have enough data already?
- if size >= outSize {
- break
- }
- }
- if size < outSize {
- return ErrShortData
- }
-
- // Copy data to dst
- write := outSize
- for _, shard := range shards {
- if write < len(shard) {
- _, err := dst.Write(shard[:write])
- return err
- }
- n, err := dst.Write(shard)
- if err != nil {
- return err
- }
- write -= n
- }
- return nil
-}
-
-func (r *leopardFF16) Update(shards [][]byte, newDatashards [][]byte) error {
- return ErrNotSupported
-}
-
-func (r *leopardFF16) Split(data []byte) ([][]byte, error) {
- if len(data) == 0 {
- return nil, ErrShortData
- }
- dataLen := len(data)
- // Calculate number of bytes per data shard.
- perShard := (len(data) + r.dataShards - 1) / r.dataShards
- perShard = ((perShard + 63) / 64) * 64
-
- if cap(data) > len(data) {
- data = data[:cap(data)]
- }
-
- // Only allocate memory if necessary
- var padding []byte
- if len(data) < (r.totalShards * perShard) {
- // calculate maximum number of full shards in `data` slice
- fullShards := len(data) / perShard
- padding = make([]byte, r.totalShards*perShard-perShard*fullShards)
- copy(padding, data[perShard*fullShards:])
- data = data[0 : perShard*fullShards]
- } else {
- for i := dataLen; i < dataLen+r.dataShards; i++ {
- data[i] = 0
- }
- }
-
- // Split into equal-length shards.
- dst := make([][]byte, r.totalShards)
- i := 0
- for ; i < len(dst) && len(data) >= perShard; i++ {
- dst[i] = data[:perShard:perShard]
- data = data[perShard:]
- }
-
- for j := 0; i+j < len(dst); j++ {
- dst[i+j] = padding[:perShard:perShard]
- padding = padding[perShard:]
- }
-
- return dst, nil
-}
-
-func (r *leopardFF16) ReconstructSome(shards [][]byte, required []bool) error {
- return r.ReconstructData(shards)
-}
-
-func (r *leopardFF16) Reconstruct(shards [][]byte) error {
- return r.reconstruct(shards, true)
-}
-
-func (r *leopardFF16) ReconstructData(shards [][]byte) error {
- return r.reconstruct(shards, false)
-}
-
-func (r *leopardFF16) Verify(shards [][]byte) (bool, error) {
- if len(shards) != r.totalShards {
- return false, ErrTooFewShards
- }
- if err := checkShards(shards, false); err != nil {
- return false, err
- }
-
- // Re-encode parity shards to temporary storage.
- shardSize := len(shards[0])
- outputs := make([][]byte, r.totalShards)
- copy(outputs, shards[:r.dataShards])
- for i := r.dataShards; i < r.totalShards; i++ {
- outputs[i] = make([]byte, shardSize)
- }
- if err := r.Encode(outputs); err != nil {
- return false, err
- }
-
- // Compare.
- for i := r.dataShards; i < r.totalShards; i++ {
- if !bytes.Equal(outputs[i], shards[i]) {
- return false, nil
- }
- }
- return true, nil
-}
-
-func (r *leopardFF16) reconstruct(shards [][]byte, recoverAll bool) error {
- if len(shards) != r.totalShards {
- return ErrTooFewShards
- }
-
- if err := checkShards(shards, true); err != nil {
- return err
- }
-
- // Quick check: are all of the shards present? If so, there's
- // nothing to do.
- numberPresent := 0
- dataPresent := 0
- for i := 0; i < r.totalShards; i++ {
- if len(shards[i]) != 0 {
- numberPresent++
- if i < r.dataShards {
- dataPresent++
- }
- }
- }
- if numberPresent == r.totalShards || !recoverAll && dataPresent == r.dataShards {
- // Cool. All of the shards have data. We don't
- // need to do anything.
- return nil
- }
-
- // Use only if we are missing less than 1/4 parity.
- useBits := r.totalShards-numberPresent <= r.parityShards/4
-
- // Check if we have enough to reconstruct.
- if numberPresent < r.dataShards {
- return ErrTooFewShards
- }
-
- shardSize := shardSize(shards)
- if shardSize%64 != 0 {
- return ErrShardSize
- }
-
- m := ceilPow2(r.parityShards)
- n := ceilPow2(m + r.dataShards)
-
- const LEO_ERROR_BITFIELD_OPT = true
-
- // Fill in error locations.
- var errorBits errorBitfield
- var errLocs [order]ffe
- for i := 0; i < r.parityShards; i++ {
- if len(shards[i+r.dataShards]) == 0 {
- errLocs[i] = 1
- if LEO_ERROR_BITFIELD_OPT && recoverAll {
- errorBits.set(i)
- }
- }
- }
- for i := r.parityShards; i < m; i++ {
- errLocs[i] = 1
- if LEO_ERROR_BITFIELD_OPT && recoverAll {
- errorBits.set(i)
- }
- }
- for i := 0; i < r.dataShards; i++ {
- if len(shards[i]) == 0 {
- errLocs[i+m] = 1
- if LEO_ERROR_BITFIELD_OPT {
- errorBits.set(i + m)
- }
- }
- }
-
- if LEO_ERROR_BITFIELD_OPT && useBits {
- errorBits.prepare()
- }
-
- // Evaluate error locator polynomial
- fwht(&errLocs, order, m+r.dataShards)
-
- for i := 0; i < order; i++ {
- errLocs[i] = ffe((uint(errLocs[i]) * uint(logWalsh[i])) % modulus)
- }
-
- fwht(&errLocs, order, order)
-
- var work [][]byte
- if w, ok := r.workPool.Get().([][]byte); ok {
- work = w
- }
- if cap(work) >= n {
- work = work[:n]
- } else {
- work = make([][]byte, n)
- }
- for i := range work {
- if cap(work[i]) < shardSize {
- work[i] = make([]byte, shardSize)
- } else {
- work[i] = work[i][:shardSize]
- }
- }
- defer r.workPool.Put(work)
-
- // work <- recovery data
-
- for i := 0; i < r.parityShards; i++ {
- if len(shards[i+r.dataShards]) != 0 {
- mulgf16(work[i], shards[i+r.dataShards], errLocs[i], &r.o)
- } else {
- memclr(work[i])
- }
- }
- for i := r.parityShards; i < m; i++ {
- memclr(work[i])
- }
-
- // work <- original data
-
- for i := 0; i < r.dataShards; i++ {
- if len(shards[i]) != 0 {
- mulgf16(work[m+i], shards[i], errLocs[m+i], &r.o)
- } else {
- memclr(work[m+i])
- }
- }
- for i := m + r.dataShards; i < n; i++ {
- memclr(work[i])
- }
-
- // work <- IFFT(work, n, 0)
-
- ifftDITDecoder(
- m+r.dataShards,
- work,
- n,
- fftSkew[:],
- &r.o,
- )
-
- // work <- FormalDerivative(work, n)
-
- for i := 1; i < n; i++ {
- width := ((i ^ (i - 1)) + 1) >> 1
- slicesXor(work[i-width:i], work[i:i+width], &r.o)
- }
-
- // work <- FFT(work, n, 0) truncated to m + dataShards
-
- outputCount := m + r.dataShards
-
- if LEO_ERROR_BITFIELD_OPT && useBits {
- errorBits.fftDIT(work, outputCount, n, fftSkew[:], &r.o)
- } else {
- fftDIT(work, outputCount, n, fftSkew[:], &r.o)
- }
-
- // Reveal erasures
- //
- // Original = -ErrLocator * FFT( Derivative( IFFT( ErrLocator * ReceivedData ) ) )
- // mul_mem(x, y, log_m, ) equals x[] = y[] * log_m
- //
- // mem layout: [Recovery Data (Power of Two = M)] [Original Data (K)] [Zero Padding out to N]
- end := r.dataShards
- if recoverAll {
- end = r.totalShards
- }
- for i := 0; i < end; i++ {
- if len(shards[i]) != 0 {
- continue
- }
- if cap(shards[i]) >= shardSize {
- shards[i] = shards[i][:shardSize]
- } else {
- shards[i] = make([]byte, shardSize)
- }
- if i >= r.dataShards {
- // Parity shard.
- mulgf16(shards[i], work[i-r.dataShards], modulus-errLocs[i-r.dataShards], &r.o)
- } else {
- // Data shard.
- mulgf16(shards[i], work[i+m], modulus-errLocs[i+m], &r.o)
- }
- }
- return nil
-}
-
-// Basic no-frills version for decoder
-func ifftDITDecoder(mtrunc int, work [][]byte, m int, skewLUT []ffe, o *options) {
- // Decimation in time: Unroll 2 layers at a time
- dist := 1
- dist4 := 4
- for dist4 <= m {
- // For each set of dist*4 elements:
- for r := 0; r < mtrunc; r += dist4 {
- iend := r + dist
- log_m01 := skewLUT[iend-1]
- log_m02 := skewLUT[iend+dist-1]
- log_m23 := skewLUT[iend+dist*2-1]
-
- // For each set of dist elements:
- for i := r; i < iend; i++ {
- ifftDIT4(work[i:], dist, log_m01, log_m23, log_m02, o)
- }
- }
- dist = dist4
- dist4 <<= 2
- }
-
- // If there is one layer left:
- if dist < m {
- // Assuming that dist = m / 2
- if dist*2 != m {
- panic("internal error")
- }
-
- log_m := skewLUT[dist-1]
-
- if log_m == modulus {
- slicesXor(work[dist:2*dist], work[:dist], o)
- } else {
- for i := 0; i < dist; i++ {
- ifftDIT2(
- work[i],
- work[i+dist],
- log_m,
- o,
- )
- }
- }
- }
-}
-
-// In-place FFT for encoder and decoder
-func fftDIT(work [][]byte, mtrunc, m int, skewLUT []ffe, o *options) {
- // Decimation in time: Unroll 2 layers at a time
- dist4 := m
- dist := m >> 2
- for dist != 0 {
- // For each set of dist*4 elements:
- for r := 0; r < mtrunc; r += dist4 {
- iend := r + dist
- log_m01 := skewLUT[iend-1]
- log_m02 := skewLUT[iend+dist-1]
- log_m23 := skewLUT[iend+dist*2-1]
-
- // For each set of dist elements:
- for i := r; i < iend; i++ {
- fftDIT4(
- work[i:],
- dist,
- log_m01,
- log_m23,
- log_m02,
- o,
- )
- }
- }
- dist4 = dist
- dist >>= 2
- }
-
- // If there is one layer left:
- if dist4 == 2 {
- for r := 0; r < mtrunc; r += 2 {
- log_m := skewLUT[r+1-1]
-
- if log_m == modulus {
- sliceXor(work[r], work[r+1], o)
- } else {
- fftDIT2(work[r], work[r+1], log_m, o)
- }
- }
- }
-}
-
-// 4-way butterfly
-func fftDIT4Ref(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- // First layer:
- if log_m02 == modulus {
- sliceXor(work[0], work[dist*2], o)
- sliceXor(work[dist], work[dist*3], o)
- } else {
- fftDIT2(work[0], work[dist*2], log_m02, o)
- fftDIT2(work[dist], work[dist*3], log_m02, o)
- }
-
- // Second layer:
- if log_m01 == modulus {
- sliceXor(work[0], work[dist], o)
- } else {
- fftDIT2(work[0], work[dist], log_m01, o)
- }
-
- if log_m23 == modulus {
- sliceXor(work[dist*2], work[dist*3], o)
- } else {
- fftDIT2(work[dist*2], work[dist*3], log_m23, o)
- }
-}
-
-// Unrolled IFFT for encoder
-func ifftDITEncoder(data [][]byte, mtrunc int, work [][]byte, xorRes [][]byte, m int, skewLUT []ffe, o *options) {
- // I tried rolling the memcpy/memset into the first layer of the FFT and
- // found that it only yields a 4% performance improvement, which is not
- // worth the extra complexity.
- for i := 0; i < mtrunc; i++ {
- copy(work[i], data[i])
- }
- for i := mtrunc; i < m; i++ {
- memclr(work[i])
- }
-
- // I tried splitting up the first few layers into L3-cache sized blocks but
- // found that it only provides about 5% performance boost, which is not
- // worth the extra complexity.
-
- // Decimation in time: Unroll 2 layers at a time
- dist := 1
- dist4 := 4
- for dist4 <= m {
- // For each set of dist*4 elements:
- for r := 0; r < mtrunc; r += dist4 {
- iend := r + dist
- log_m01 := skewLUT[iend]
- log_m02 := skewLUT[iend+dist]
- log_m23 := skewLUT[iend+dist*2]
-
- // For each set of dist elements:
- for i := r; i < iend; i++ {
- ifftDIT4(
- work[i:],
- dist,
- log_m01,
- log_m23,
- log_m02,
- o,
- )
- }
- }
-
- dist = dist4
- dist4 <<= 2
- // I tried alternating sweeps left->right and right->left to reduce cache misses.
- // It provides about 1% performance boost when done for both FFT and IFFT, so it
- // does not seem to be worth the extra complexity.
- }
-
- // If there is one layer left:
- if dist < m {
- // Assuming that dist = m / 2
- if dist*2 != m {
- panic("internal error")
- }
-
- logm := skewLUT[dist]
-
- if logm == modulus {
- slicesXor(work[dist:dist*2], work[:dist], o)
- } else {
- for i := 0; i < dist; i++ {
- ifftDIT2(work[i], work[i+dist], logm, o)
- }
- }
- }
-
- // I tried unrolling this but it does not provide more than 5% performance
- // improvement for 16-bit finite fields, so it's not worth the complexity.
- if xorRes != nil {
- slicesXor(xorRes[:m], work[:m], o)
- }
-}
-
-func ifftDIT4Ref(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
- // First layer:
- if log_m01 == modulus {
- sliceXor(work[0], work[dist], o)
- } else {
- ifftDIT2(work[0], work[dist], log_m01, o)
- }
-
- if log_m23 == modulus {
- sliceXor(work[dist*2], work[dist*3], o)
- } else {
- ifftDIT2(work[dist*2], work[dist*3], log_m23, o)
- }
-
- // Second layer:
- if log_m02 == modulus {
- sliceXor(work[0], work[dist*2], o)
- sliceXor(work[dist], work[dist*3], o)
- } else {
- ifftDIT2(work[0], work[dist*2], log_m02, o)
- ifftDIT2(work[dist], work[dist*3], log_m02, o)
- }
-}
-
-// Reference version of muladd: x[] ^= y[] * log_m
-func refMulAdd(x, y []byte, log_m ffe) {
- lut := &mul16LUTs[log_m]
-
- for len(x) >= 64 {
- // Assert sizes for no bounds checks in loop
- hiA := y[32:64]
- loA := y[:32]
- dst := x[:64] // Needed, but not checked...
- for i, lo := range loA {
- hi := hiA[i]
- prod := lut.Lo[lo] ^ lut.Hi[hi]
-
- dst[i] ^= byte(prod)
- dst[i+32] ^= byte(prod >> 8)
- }
- x = x[64:]
- y = y[64:]
- }
-}
-
-func memclr(s []byte) {
- for i := range s {
- s[i] = 0
- }
-}
-
-// slicesXor calls xor for every slice pair in v1, v2.
-func slicesXor(v1, v2 [][]byte, o *options) {
- for i, v := range v1 {
- sliceXor(v2[i], v, o)
- }
-}
-
-// Reference version of mul: x[] = y[] * log_m
-func refMul(x, y []byte, log_m ffe) {
- lut := &mul16LUTs[log_m]
-
- for off := 0; off < len(x); off += 64 {
- loA := y[off : off+32]
- hiA := y[off+32:]
- hiA = hiA[:len(loA)]
- for i, lo := range loA {
- hi := hiA[i]
- prod := lut.Lo[lo] ^ lut.Hi[hi]
-
- x[off+i] = byte(prod)
- x[off+i+32] = byte(prod >> 8)
- }
- }
-}
-
-// Returns a * Log(b)
-func mulLog(a, log_b ffe) ffe {
- /*
- Note that this operation is not a normal multiplication in a finite
- field because the right operand is already a logarithm. This is done
- because it moves K table lookups from the Decode() method into the
- initialization step that is less performance critical. The LogWalsh[]
- table below contains precalculated logarithms so it is easier to do
- all the other multiplies in that form as well.
- */
- if a == 0 {
- return 0
- }
- return expLUT[addMod(logLUT[a], log_b)]
-}
-
-// z = x + y (mod kModulus)
-func addMod(a, b ffe) ffe {
- sum := uint(a) + uint(b)
-
- // Partial reduction step, allowing for kModulus to be returned
- return ffe(sum + sum>>bitwidth)
-}
-
-// z = x - y (mod kModulus)
-func subMod(a, b ffe) ffe {
- dif := uint(a) - uint(b)
-
- // Partial reduction step, allowing for kModulus to be returned
- return ffe(dif + dif>>bitwidth)
-}
-
-// ceilPow2 returns power of two at or above n.
-func ceilPow2(n int) int {
- const w = int(unsafe.Sizeof(n) * 8)
- return 1 << (w - bits.LeadingZeros(uint(n-1)))
-}
-
-// Decimation in time (DIT) Fast Walsh-Hadamard Transform
-// Unrolls pairs of layers to perform cross-layer operations in registers
-// mtrunc: Number of elements that are non-zero at the front of data
-func fwht(data *[order]ffe, m, mtrunc int) {
- // Decimation in time: Unroll 2 layers at a time
- dist := 1
- dist4 := 4
- for dist4 <= m {
- // For each set of dist*4 elements:
- for r := 0; r < mtrunc; r += dist4 {
- // For each set of dist elements:
- // Use 16 bit indices to avoid bounds check on [65536]ffe.
- dist := uint16(dist)
- off := uint16(r)
- for i := uint16(0); i < dist; i++ {
- // fwht4(data[i:], dist) inlined...
- // Reading values appear faster than updating pointers.
- // Casting to uint is not faster.
- t0 := data[off]
- t1 := data[off+dist]
- t2 := data[off+dist*2]
- t3 := data[off+dist*3]
-
- t0, t1 = fwht2alt(t0, t1)
- t2, t3 = fwht2alt(t2, t3)
- t0, t2 = fwht2alt(t0, t2)
- t1, t3 = fwht2alt(t1, t3)
-
- data[off] = t0
- data[off+dist] = t1
- data[off+dist*2] = t2
- data[off+dist*3] = t3
- off++
- }
- }
- dist = dist4
- dist4 <<= 2
- }
-
- // If there is one layer left:
- if dist < m {
- dist := uint16(dist)
- for i := uint16(0); i < dist; i++ {
- fwht2(&data[i], &data[i+dist])
- }
- }
-}
-
-func fwht4(data []ffe, s int) {
- s2 := s << 1
-
- t0 := &data[0]
- t1 := &data[s]
- t2 := &data[s2]
- t3 := &data[s2+s]
-
- fwht2(t0, t1)
- fwht2(t2, t3)
- fwht2(t0, t2)
- fwht2(t1, t3)
-}
-
-// {a, b} = {a + b, a - b} (Mod Q)
-func fwht2(a, b *ffe) {
- sum := addMod(*a, *b)
- dif := subMod(*a, *b)
- *a = sum
- *b = dif
-}
-
-// fwht2alt is as fwht2, but returns result.
-func fwht2alt(a, b ffe) (ffe, ffe) {
- return addMod(a, b), subMod(a, b)
-}
-
-var initOnce sync.Once
-
-func initConstants() {
- initOnce.Do(func() {
- initLUTs()
- initFFTSkew()
- initMul16LUT()
- })
-}
-
-// Initialize logLUT, expLUT.
-func initLUTs() {
- cantorBasis := [bitwidth]ffe{
- 0x0001, 0xACCA, 0x3C0E, 0x163E,
- 0xC582, 0xED2E, 0x914C, 0x4012,
- 0x6C98, 0x10D8, 0x6A72, 0xB900,
- 0xFDB8, 0xFB34, 0xFF38, 0x991E,
- }
-
- expLUT = &[order]ffe{}
- logLUT = &[order]ffe{}
-
- // LFSR table generation:
- state := 1
- for i := ffe(0); i < modulus; i++ {
- expLUT[state] = i
- state <<= 1
- if state >= order {
- state ^= polynomial
- }
- }
- expLUT[0] = modulus
-
- // Conversion to Cantor basis:
-
- logLUT[0] = 0
- for i := 0; i < bitwidth; i++ {
- basis := cantorBasis[i]
- width := 1 << i
-
- for j := 0; j < width; j++ {
- logLUT[j+width] = logLUT[j] ^ basis
- }
- }
-
- for i := 0; i < order; i++ {
- logLUT[i] = expLUT[logLUT[i]]
- }
-
- for i := 0; i < order; i++ {
- expLUT[logLUT[i]] = ffe(i)
- }
-
- expLUT[modulus] = expLUT[0]
-}
-
-// Initialize fftSkew.
-func initFFTSkew() {
- var temp [bitwidth - 1]ffe
-
- // Generate FFT skew vector {1}:
-
- for i := 1; i < bitwidth; i++ {
- temp[i-1] = ffe(1 << i)
- }
-
- fftSkew = &[modulus]ffe{}
- logWalsh = &[order]ffe{}
-
- for m := 0; m < bitwidth-1; m++ {
- step := 1 << (m + 1)
-
- fftSkew[1<>4)+16)]
- lut.Hi[i] = tmp[((i&15)+32)] ^ tmp[((i>>4)+48)]
- }
- }
- if cpuid.CPU.Has(cpuid.SSSE3) || cpuid.CPU.Has(cpuid.AVX2) || cpuid.CPU.Has(cpuid.AVX512F) {
- multiply256LUT = &[order][16 * 8]byte{}
-
- for logM := range multiply256LUT[:] {
- // For each 4 bits of the finite field width in bits:
- shift := 0
- for i := 0; i < 4; i++ {
- // Construct 16 entry LUT for PSHUFB
- prodLo := multiply256LUT[logM][i*16 : i*16+16]
- prodHi := multiply256LUT[logM][4*16+i*16 : 4*16+i*16+16]
- for x := range prodLo[:] {
- prod := mulLog(ffe(x<> 8)
- }
- shift += 4
- }
- }
- }
-}
-
-const kWordMips = 5
-const kWords = order / 64
-const kBigMips = 6
-const kBigWords = (kWords + 63) / 64
-const kBiggestMips = 4
-
-// errorBitfield contains progressive errors to help indicate which
-// shards need reconstruction.
-type errorBitfield struct {
- Words [kWordMips][kWords]uint64
- BigWords [kBigMips][kBigWords]uint64
- BiggestWords [kBiggestMips]uint64
-}
-
-func (e *errorBitfield) set(i int) {
- e.Words[0][i/64] |= uint64(1) << (i & 63)
-}
-
-func (e *errorBitfield) isNeededFn(mipLevel int) func(bit int) bool {
- if mipLevel >= 16 {
- return func(bit int) bool {
- return true
- }
- }
- if mipLevel >= 12 {
- w := e.BiggestWords[mipLevel-12]
- return func(bit int) bool {
- bit /= 4096
- return 0 != (w & (uint64(1) << bit))
- }
- }
- if mipLevel >= 6 {
- w := e.BigWords[mipLevel-6][:]
- return func(bit int) bool {
- bit /= 64
- return 0 != (w[bit/64] & (uint64(1) << (bit & 63)))
- }
- }
- if mipLevel > 0 {
- w := e.Words[mipLevel-1][:]
- return func(bit int) bool {
- return 0 != (w[bit/64] & (uint64(1) << (bit & 63)))
- }
- }
- return nil
-}
-
-func (e *errorBitfield) isNeeded(mipLevel int, bit uint) bool {
- if mipLevel >= 16 {
- return true
- }
- if mipLevel >= 12 {
- bit /= 4096
- return 0 != (e.BiggestWords[mipLevel-12] & (uint64(1) << bit))
- }
- if mipLevel >= 6 {
- bit /= 64
- return 0 != (e.BigWords[mipLevel-6][bit/64] & (uint64(1) << (bit % 64)))
- }
- return 0 != (e.Words[mipLevel-1][bit/64] & (uint64(1) << (bit % 64)))
-}
-
-var kHiMasks = [5]uint64{
- 0xAAAAAAAAAAAAAAAA,
- 0xCCCCCCCCCCCCCCCC,
- 0xF0F0F0F0F0F0F0F0,
- 0xFF00FF00FF00FF00,
- 0xFFFF0000FFFF0000,
-}
-
-func (e *errorBitfield) prepare() {
- // First mip level is for final layer of FFT: pairs of data
- for i := 0; i < kWords; i++ {
- w_i := e.Words[0][i]
- hi2lo0 := w_i | ((w_i & kHiMasks[0]) >> 1)
- lo2hi0 := (w_i & (kHiMasks[0] >> 1)) << 1
- w_i = hi2lo0 | lo2hi0
- e.Words[0][i] = w_i
-
- bits := 2
- for j := 1; j < kWordMips; j++ {
- hi2lo_j := w_i | ((w_i & kHiMasks[j]) >> bits)
- lo2hi_j := (w_i & (kHiMasks[j] >> bits)) << bits
- w_i = hi2lo_j | lo2hi_j
- e.Words[j][i] = w_i
- bits <<= 1
- }
- }
-
- for i := 0; i < kBigWords; i++ {
- w_i := uint64(0)
- bit := uint64(1)
- src := e.Words[kWordMips-1][i*64 : i*64+64]
- for _, w := range src {
- w_i |= (w | (w >> 32) | (w << 32)) & bit
- bit <<= 1
- }
- e.BigWords[0][i] = w_i
-
- bits := 1
- for j := 1; j < kBigMips; j++ {
- hi2lo_j := w_i | ((w_i & kHiMasks[j-1]) >> bits)
- lo2hi_j := (w_i & (kHiMasks[j-1] >> bits)) << bits
- w_i = hi2lo_j | lo2hi_j
- e.BigWords[j][i] = w_i
- bits <<= 1
- }
- }
-
- w_i := uint64(0)
- bit := uint64(1)
- for _, w := range e.BigWords[kBigMips-1][:kBigWords] {
- w_i |= (w | (w >> 32) | (w << 32)) & bit
- bit <<= 1
- }
- e.BiggestWords[0] = w_i
-
- bits := uint64(1)
- for j := 1; j < kBiggestMips; j++ {
- hi2lo_j := w_i | ((w_i & kHiMasks[j-1]) >> bits)
- lo2hi_j := (w_i & (kHiMasks[j-1] >> bits)) << bits
- w_i = hi2lo_j | lo2hi_j
- e.BiggestWords[j] = w_i
- bits <<= 1
- }
-}
-
-func (e *errorBitfield) fftDIT(work [][]byte, mtrunc, m int, skewLUT []ffe, o *options) {
- // Decimation in time: Unroll 2 layers at a time
- mipLevel := bits.Len32(uint32(m)) - 1
-
- dist4 := m
- dist := m >> 2
- needed := e.isNeededFn(mipLevel)
- for dist != 0 {
- // For each set of dist*4 elements:
- for r := 0; r < mtrunc; r += dist4 {
- if !needed(r) {
- continue
- }
- iEnd := r + dist
- logM01 := skewLUT[iEnd-1]
- logM02 := skewLUT[iEnd+dist-1]
- logM23 := skewLUT[iEnd+dist*2-1]
-
- // For each set of dist elements:
- for i := r; i < iEnd; i++ {
- fftDIT4(
- work[i:],
- dist,
- logM01,
- logM23,
- logM02,
- o)
- }
- }
- dist4 = dist
- dist >>= 2
- mipLevel -= 2
- needed = e.isNeededFn(mipLevel)
- }
-
- // If there is one layer left:
- if dist4 == 2 {
- for r := 0; r < mtrunc; r += 2 {
- if !needed(r) {
- continue
- }
- logM := skewLUT[r+1-1]
-
- if logM == modulus {
- sliceXor(work[r], work[r+1], o)
- } else {
- fftDIT2(work[r], work[r+1], logM, o)
- }
- }
- }
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go
deleted file mode 100644
index 22669c2..0000000
--- a/vendor/github.com/klauspost/reedsolomon/matrix.go
+++ /dev/null
@@ -1,282 +0,0 @@
-/**
- * Matrix Algebra over an 8-bit Galois Field
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-package reedsolomon
-
-import (
- "errors"
- "fmt"
- "strconv"
- "strings"
-)
-
-// byte[row][col]
-type matrix [][]byte
-
-// newMatrix returns a matrix of zeros.
-func newMatrix(rows, cols int) (matrix, error) {
- if rows <= 0 {
- return nil, errInvalidRowSize
- }
- if cols <= 0 {
- return nil, errInvalidColSize
- }
-
- m := matrix(make([][]byte, rows))
- for i := range m {
- m[i] = make([]byte, cols)
- }
- return m, nil
-}
-
-// NewMatrixData initializes a matrix with the given row-major data.
-// Note that data is not copied from input.
-func newMatrixData(data [][]byte) (matrix, error) {
- m := matrix(data)
- err := m.Check()
- if err != nil {
- return nil, err
- }
- return m, nil
-}
-
-// IdentityMatrix returns an identity matrix of the given size.
-func identityMatrix(size int) (matrix, error) {
- m, err := newMatrix(size, size)
- if err != nil {
- return nil, err
- }
- for i := range m {
- m[i][i] = 1
- }
- return m, nil
-}
-
-// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
-var errInvalidRowSize = errors.New("invalid row size")
-
-// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
-var errInvalidColSize = errors.New("invalid column size")
-
-// errColSizeMismatch is returned if the size of matrix columns mismatch.
-var errColSizeMismatch = errors.New("column size is not the same for all rows")
-
-func (m matrix) Check() error {
- rows := len(m)
- if rows <= 0 {
- return errInvalidRowSize
- }
- cols := len(m[0])
- if cols <= 0 {
- return errInvalidColSize
- }
-
- for _, col := range m {
- if len(col) != cols {
- return errColSizeMismatch
- }
- }
- return nil
-}
-
-// String returns a human-readable string of the matrix contents.
-//
-// Example: [[1, 2], [3, 4]]
-func (m matrix) String() string {
- rowOut := make([]string, 0, len(m))
- for _, row := range m {
- colOut := make([]string, 0, len(row))
- for _, col := range row {
- colOut = append(colOut, strconv.Itoa(int(col)))
- }
- rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
- }
- return "[" + strings.Join(rowOut, ", ") + "]"
-}
-
-// Multiply multiplies this matrix (the one on the left) by another
-// matrix (the one on the right) and returns a new matrix with the result.
-func (m matrix) Multiply(right matrix) (matrix, error) {
- if len(m[0]) != len(right) {
- return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
- }
- result, _ := newMatrix(len(m), len(right[0]))
- for r, row := range result {
- for c := range row {
- var value byte
- for i := range m[0] {
- value ^= galMultiply(m[r][i], right[i][c])
- }
- result[r][c] = value
- }
- }
- return result, nil
-}
-
-// Augment returns the concatenation of this matrix and the matrix on the right.
-func (m matrix) Augment(right matrix) (matrix, error) {
- if len(m) != len(right) {
- return nil, errMatrixSize
- }
-
- result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
- for r, row := range m {
- for c := range row {
- result[r][c] = m[r][c]
- }
- cols := len(m[0])
- for c := range right[0] {
- result[r][cols+c] = right[r][c]
- }
- }
- return result, nil
-}
-
-// errMatrixSize is returned if matrix dimensions are doesn't match.
-var errMatrixSize = errors.New("matrix sizes do not match")
-
-func (m matrix) SameSize(n matrix) error {
- if len(m) != len(n) {
- return errMatrixSize
- }
- for i := range m {
- if len(m[i]) != len(n[i]) {
- return errMatrixSize
- }
- }
- return nil
-}
-
-// SubMatrix returns a part of this matrix. Data is copied.
-func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
- result, err := newMatrix(rmax-rmin, cmax-cmin)
- if err != nil {
- return nil, err
- }
- // OPTME: If used heavily, use copy function to copy slice
- for r := rmin; r < rmax; r++ {
- for c := cmin; c < cmax; c++ {
- result[r-rmin][c-cmin] = m[r][c]
- }
- }
- return result, nil
-}
-
-// SwapRows Exchanges two rows in the matrix.
-func (m matrix) SwapRows(r1, r2 int) error {
- if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
- return errInvalidRowSize
- }
- m[r2], m[r1] = m[r1], m[r2]
- return nil
-}
-
-// IsSquare will return true if the matrix is square
-// and nil if the matrix is square
-func (m matrix) IsSquare() bool {
- return len(m) == len(m[0])
-}
-
-// errSingular is returned if the matrix is singular and cannot be inversed
-var errSingular = errors.New("matrix is singular")
-
-// errNotSquare is returned if attempting to inverse a non-square matrix.
-var errNotSquare = errors.New("only square matrices can be inverted")
-
-// Invert returns the inverse of this matrix.
-// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
-// The matrix must be square, otherwise ErrNotSquare is returned.
-func (m matrix) Invert() (matrix, error) {
- if !m.IsSquare() {
- return nil, errNotSquare
- }
-
- size := len(m)
- work, _ := identityMatrix(size)
- work, _ = m.Augment(work)
-
- err := work.gaussianElimination()
- if err != nil {
- return nil, err
- }
-
- return work.SubMatrix(0, size, size, size*2)
-}
-
-func (m matrix) gaussianElimination() error {
- rows := len(m)
- columns := len(m[0])
- // Clear out the part below the main diagonal and scale the main
- // diagonal to be 1.
- for r := 0; r < rows; r++ {
- // If the element on the diagonal is 0, find a row below
- // that has a non-zero and swap them.
- if m[r][r] == 0 {
- for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
- if m[rowBelow][r] != 0 {
- err := m.SwapRows(r, rowBelow)
- if err != nil {
- return err
- }
- break
- }
- }
- }
- // If we couldn't find one, the matrix is singular.
- if m[r][r] == 0 {
- return errSingular
- }
- // Scale to 1.
- if m[r][r] != 1 {
- scale := galDivide(1, m[r][r])
- for c := 0; c < columns; c++ {
- m[r][c] = galMultiply(m[r][c], scale)
- }
- }
- // Make everything below the 1 be a 0 by subtracting
- // a multiple of it. (Subtraction and addition are
- // both exclusive or in the Galois field.)
- for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
- if m[rowBelow][r] != 0 {
- scale := m[rowBelow][r]
- for c := 0; c < columns; c++ {
- m[rowBelow][c] ^= galMultiply(scale, m[r][c])
- }
- }
- }
- }
-
- // Now clear the part above the main diagonal.
- for d := 0; d < rows; d++ {
- for rowAbove := 0; rowAbove < d; rowAbove++ {
- if m[rowAbove][d] != 0 {
- scale := m[rowAbove][d]
- for c := 0; c < columns; c++ {
- m[rowAbove][c] ^= galMultiply(scale, m[d][c])
- }
-
- }
- }
- }
- return nil
-}
-
-// Create a Vandermonde matrix, which is guaranteed to have the
-// property that any subset of rows that forms a square matrix
-// is invertible.
-func vandermonde(rows, cols int) (matrix, error) {
- result, err := newMatrix(rows, cols)
- if err != nil {
- return nil, err
- }
- for r, row := range result {
- for c := range row {
- result[r][c] = galExp(byte(r), c)
- }
- }
- return result, nil
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go
deleted file mode 100644
index 83dd2cd..0000000
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ /dev/null
@@ -1,234 +0,0 @@
-package reedsolomon
-
-import (
- "runtime"
-
- "github.com/klauspost/cpuid/v2"
-)
-
-// Option allows to override processing parameters.
-type Option func(*options)
-
-type options struct {
- maxGoroutines int
- minSplitSize int
- shardSize int
- perRound int
-
- useAVX512, useAVX2, useSSSE3, useSSE2 bool
- useJerasureMatrix bool
- usePAR1Matrix bool
- useCauchy bool
- fastOneParity bool
- inversionCache bool
- customMatrix [][]byte
- withLeopard *bool
-
- // stream options
- concReads bool
- concWrites bool
- streamBS int
-}
-
-var defaultOptions = options{
- maxGoroutines: 384,
- minSplitSize: -1,
- fastOneParity: false,
- inversionCache: true,
-
- // Detect CPU capabilities.
- useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3),
- useSSE2: cpuid.CPU.Supports(cpuid.SSE2),
- useAVX2: cpuid.CPU.Supports(cpuid.AVX2),
- useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW),
-}
-
-func init() {
- if runtime.GOMAXPROCS(0) <= 1 {
- defaultOptions.maxGoroutines = 1
- }
-}
-
-// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
-// Jobs will be split into this many parts, unless each goroutine would have to process
-// less than minSplitSize bytes (set with WithMinSplitSize).
-// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
-// scheduling.
-// If n <= 0, it is ignored.
-func WithMaxGoroutines(n int) Option {
- return func(o *options) {
- if n > 0 {
- o.maxGoroutines = n
- }
- }
-}
-
-// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
-// specific shard size.
-// Send in the shard size you expect to send. Other shard sizes will work, but may not
-// run at the optimal speed.
-// Overwrites WithMaxGoroutines.
-// If shardSize <= 0, it is ignored.
-func WithAutoGoroutines(shardSize int) Option {
- return func(o *options) {
- o.shardSize = shardSize
- }
-}
-
-// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
-// By default this parameter is determined by CPU cache characteristics.
-// See WithMaxGoroutines on how jobs are split.
-// If n <= 0, it is ignored.
-func WithMinSplitSize(n int) Option {
- return func(o *options) {
- if n > 0 {
- o.minSplitSize = n
- }
- }
-}
-
-// WithConcurrentStreams will enable concurrent reads and writes on the streams.
-// Default: Disabled, meaning only one stream will be read/written at the time.
-// Ignored if not used on a stream input.
-func WithConcurrentStreams(enabled bool) Option {
- return func(o *options) {
- o.concReads, o.concWrites = enabled, enabled
- }
-}
-
-// WithConcurrentStreamReads will enable concurrent reads from the input streams.
-// Default: Disabled, meaning only one stream will be read at the time.
-// Ignored if not used on a stream input.
-func WithConcurrentStreamReads(enabled bool) Option {
- return func(o *options) {
- o.concReads = enabled
- }
-}
-
-// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
-// Default: Disabled, meaning only one stream will be written at the time.
-// Ignored if not used on a stream input.
-func WithConcurrentStreamWrites(enabled bool) Option {
- return func(o *options) {
- o.concWrites = enabled
- }
-}
-
-// WithInversionCache allows to control the inversion cache.
-// This will cache reconstruction matrices so they can be reused.
-// Enabled by default.
-func WithInversionCache(enabled bool) Option {
- return func(o *options) {
- o.inversionCache = enabled
- }
-}
-
-// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
-// If not set, any shard size set with WithAutoGoroutines will be used.
-// If WithAutoGoroutines is also unset, 4MB will be used.
-// Ignored if not used on stream.
-func WithStreamBlockSize(n int) Option {
- return func(o *options) {
- o.streamBS = n
- }
-}
-
-// WithSSSE3 allows to enable/disable SSSE3 instructions.
-// If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
-func WithSSSE3(enabled bool) Option {
- return func(o *options) {
- o.useSSSE3 = enabled
- }
-}
-
-// WithAVX2 allows to enable/disable AVX2 instructions.
-// If not set, AVX2 will be turned on or off automatically based on CPU ID information.
-func WithAVX2(enabled bool) Option {
- return func(o *options) {
- o.useAVX2 = enabled
- }
-}
-
-// WithSSE2 allows to enable/disable SSE2 instructions.
-// If not set, SSE2 will be turned on or off automatically based on CPU ID information.
-func WithSSE2(enabled bool) Option {
- return func(o *options) {
- o.useSSE2 = enabled
- }
-}
-
-// WithAVX512 allows to enable/disable AVX512 instructions.
-// If not set, AVX512 will be turned on or off automatically based on CPU ID information.
-func WithAVX512(enabled bool) Option {
- return func(o *options) {
- o.useAVX512 = enabled
- }
-}
-
-// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
-// matrix in the same way as done by the Jerasure library.
-// The first row and column of the coding matrix only contains 1's in this method
-// so the first parity chunk is always equal to XOR of all data chunks.
-func WithJerasureMatrix() Option {
- return func(o *options) {
- o.useJerasureMatrix = true
- o.usePAR1Matrix = false
- o.useCauchy = false
- }
-}
-
-// WithPAR1Matrix causes the encoder to build the matrix how PARv1
-// does. Note that the method they use is buggy, and may lead to cases
-// where recovery is impossible, even if there are enough parity
-// shards.
-func WithPAR1Matrix() Option {
- return func(o *options) {
- o.useJerasureMatrix = false
- o.usePAR1Matrix = true
- o.useCauchy = false
- }
-}
-
-// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
-// The output of this is not compatible with the standard output.
-// A Cauchy matrix is faster to generate. This does not affect data throughput,
-// but will result in slightly faster start-up time.
-func WithCauchyMatrix() Option {
- return func(o *options) {
- o.useJerasureMatrix = false
- o.usePAR1Matrix = false
- o.useCauchy = true
- }
-}
-
-// WithFastOneParityMatrix will switch the matrix to a simple xor
-// if there is only one parity shard.
-// The PAR1 matrix already has this property so it has little effect there.
-func WithFastOneParityMatrix() Option {
- return func(o *options) {
- o.fastOneParity = true
- }
-}
-
-// WithCustomMatrix causes the encoder to use the manually specified matrix.
-// customMatrix represents only the parity chunks.
-// customMatrix must have at least ParityShards rows and DataShards columns.
-// It can be used for interoperability with libraries which generate
-// the matrix differently or to implement more complex coding schemes like LRC
-// (locally reconstructible codes).
-func WithCustomMatrix(customMatrix [][]byte) Option {
- return func(o *options) {
- o.customMatrix = customMatrix
- }
-}
-
-// WithLeopardGF16 will always use leopard GF16 for encoding,
-// even when there is less than 256 shards.
-// This will likely improve reconstruction time for some setups.
-// This is not compatible with Leopard output for <= 256 shards.
-// Note that Leopard places certain restrictions on use see other documentation.
-func WithLeopardGF16(enabled bool) Option {
- return func(o *options) {
- o.withLeopard = &enabled
- }
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
deleted file mode 100644
index a2e5886..0000000
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ /dev/null
@@ -1,1451 +0,0 @@
-/**
- * Reed-Solomon Coding over 8-bit values.
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-// Package reedsolomon enables Erasure Coding in Go
-//
-// For usage and examples, see https://github.com/klauspost/reedsolomon
-package reedsolomon
-
-import (
- "bytes"
- "errors"
- "io"
- "runtime"
- "sync"
-
- "github.com/klauspost/cpuid/v2"
-)
-
-// Encoder is an interface to encode Reed-Salomon parity sets for your data.
-type Encoder interface {
- // Encode parity for a set of data shards.
- // Input is 'shards' containing data shards followed by parity shards.
- // The number of shards must match the number given to New().
- // Each shard is a byte array, and they must all be the same size.
- // The parity shards will always be overwritten and the data shards
- // will remain the same, so it is safe for you to read from the
- // data shards while this is running.
- Encode(shards [][]byte) error
-
- // EncodeIdx will add parity for a single data shard.
- // Parity shards should start out as 0. The caller must zero them.
- // Data shards must be delivered exactly once. There is no check for this.
- // The parity shards will always be updated and the data shards will remain the same.
- EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
-
- // Verify returns true if the parity shards contain correct data.
- // The data is the same format as Encode. No data is modified, so
- // you are allowed to read from data while this is running.
- Verify(shards [][]byte) (bool, error)
-
- // Reconstruct will recreate the missing shards if possible.
- //
- // Given a list of shards, some of which contain data, fills in the
- // ones that don't have data.
- //
- // The length of the array must be equal to the total number of shards.
- // You indicate that a shard is missing by setting it to nil or zero-length.
- // If a shard is zero-length but has sufficient capacity, that memory will
- // be used, otherwise a new []byte will be allocated.
- //
- // If there are too few shards to reconstruct the missing
- // ones, ErrTooFewShards will be returned.
- //
- // The reconstructed shard set is complete, but integrity is not verified.
- // Use the Verify function to check if data set is ok.
- Reconstruct(shards [][]byte) error
-
- // ReconstructData will recreate any missing data shards, if possible.
- //
- // Given a list of shards, some of which contain data, fills in the
- // data shards that don't have data.
- //
- // The length of the array must be equal to Shards.
- // You indicate that a shard is missing by setting it to nil or zero-length.
- // If a shard is zero-length but has sufficient capacity, that memory will
- // be used, otherwise a new []byte will be allocated.
- //
- // If there are too few shards to reconstruct the missing
- // ones, ErrTooFewShards will be returned.
- //
- // As the reconstructed shard set may contain missing parity shards,
- // calling the Verify function is likely to fail.
- ReconstructData(shards [][]byte) error
-
- // ReconstructSome will recreate only requested data shards, if possible.
- //
- // Given a list of shards, some of which contain data, fills in the
- // data shards indicated by true values in the "required" parameter.
- // The length of "required" array must be equal to DataShards.
- //
- // The length of "shards" array must be equal to Shards.
- // You indicate that a shard is missing by setting it to nil or zero-length.
- // If a shard is zero-length but has sufficient capacity, that memory will
- // be used, otherwise a new []byte will be allocated.
- //
- // If there are too few shards to reconstruct the missing
- // ones, ErrTooFewShards will be returned.
- //
- // As the reconstructed shard set may contain missing parity shards,
- // calling the Verify function is likely to fail.
- ReconstructSome(shards [][]byte, required []bool) error
-
- // Update parity is use for change a few data shards and update it's parity.
- // Input 'newDatashards' containing data shards changed.
- // Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
- // new parity shards will in shards[DataShards:]
- // Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will
- // faster than Encode and not need read all data shards to encode.
- Update(shards [][]byte, newDatashards [][]byte) error
-
- // Split a data slice into the number of shards given to the encoder,
- // and create empty parity shards.
- //
- // The data will be split into equally sized shards.
- // If the data size isn't dividable by the number of shards,
- // the last shard will contain extra zeros.
- //
- // There must be at least 1 byte otherwise ErrShortData will be
- // returned.
- //
- // The data will not be copied, except for the last shard, so you
- // should not modify the data of the input slice afterwards.
- Split(data []byte) ([][]byte, error)
-
- // Join the shards and write the data segment to dst.
- //
- // Only the data shards are considered.
- // You must supply the exact output size you want.
- // If there are to few shards given, ErrTooFewShards will be returned.
- // If the total data size is less than outSize, ErrShortData will be returned.
- Join(dst io.Writer, shards [][]byte, outSize int) error
-}
-
-// Extensions is an optional interface.
-// All returned instances will support this interface.
-type Extensions interface {
- // ShardSizeMultiple will return the size the shard sizes must be a multiple of.
- ShardSizeMultiple() int
-
- // DataShards will return the number of data shards.
- DataShards() int
-
- // ParityShards will return the number of parity shards.
- ParityShards() int
-
- // TotalShards will return the total number of shards.
- TotalShards() int
-}
-
-const (
- avx2CodeGenMinSize = 64
- avx2CodeGenMinShards = 3
- avx2CodeGenMaxGoroutines = 8
-
- intSize = 32 << (^uint(0) >> 63) // 32 or 64
- maxInt = 1<<(intSize-1) - 1
-)
-
-// reedSolomon contains a matrix for a specific
-// distribution of datashards and parity shards.
-// Construct if using New()
-type reedSolomon struct {
- dataShards int // Number of data shards, should not be modified.
- parityShards int // Number of parity shards, should not be modified.
- totalShards int // Total number of shards. Calculated, and should not be modified.
- m matrix
- tree *inversionTree
- parity [][]byte
- o options
- mPool sync.Pool
-}
-
-var _ = Extensions(&reedSolomon{})
-
-func (r *reedSolomon) ShardSizeMultiple() int {
- return 1
-}
-
-func (r *reedSolomon) DataShards() int {
- return r.dataShards
-}
-
-func (r *reedSolomon) ParityShards() int {
- return r.parityShards
-}
-
-func (r *reedSolomon) TotalShards() int {
- return r.parityShards
-}
-
-// ErrInvShardNum will be returned by New, if you attempt to create
-// an Encoder with less than one data shard or less than zero parity
-// shards.
-var ErrInvShardNum = errors.New("cannot create Encoder with less than one data shard or less than zero parity shards")
-
-// ErrMaxShardNum will be returned by New, if you attempt to create an
-// Encoder where data and parity shards are bigger than the order of
-// GF(2^8).
-var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
-
-// ErrNotSupported is returned when an operation is not supported.
-var ErrNotSupported = errors.New("operation not supported")
-
-// buildMatrix creates the matrix to use for encoding, given the
-// number of data shards and the number of total shards.
-//
-// The top square of the matrix is guaranteed to be an identity
-// matrix, which means that the data shards are unchanged after
-// encoding.
-func buildMatrix(dataShards, totalShards int) (matrix, error) {
- // Start with a Vandermonde matrix. This matrix would work,
- // in theory, but doesn't have the property that the data
- // shards are unchanged after encoding.
- vm, err := vandermonde(totalShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- // Multiply by the inverse of the top square of the matrix.
- // This will make the top square be the identity matrix, but
- // preserve the property that any square subset of rows is
- // invertible.
- top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- topInv, err := top.Invert()
- if err != nil {
- return nil, err
- }
-
- return vm.Multiply(topInv)
-}
-
-// buildMatrixJerasure creates the same encoding matrix as Jerasure library
-//
-// The top square of the matrix is guaranteed to be an identity
-// matrix, which means that the data shards are unchanged after
-// encoding.
-func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
- // Start with a Vandermonde matrix. This matrix would work,
- // in theory, but doesn't have the property that the data
- // shards are unchanged after encoding.
- vm, err := vandermonde(totalShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- // Jerasure does this:
- // first row is always 100..00
- vm[0][0] = 1
- for i := 1; i < dataShards; i++ {
- vm[0][i] = 0
- }
- // last row is always 000..01
- for i := 0; i < dataShards-1; i++ {
- vm[totalShards-1][i] = 0
- }
- vm[totalShards-1][dataShards-1] = 1
-
- for i := 0; i < dataShards; i++ {
- // Find the row where i'th col is not 0
- r := i
- for ; r < totalShards && vm[r][i] == 0; r++ {
- }
- if r != i {
- // Swap it with i'th row if not already
- t := vm[r]
- vm[r] = vm[i]
- vm[i] = t
- }
- // Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
- if vm[i][i] != 1 {
- // Make vm[i][i] = 1 by dividing the column by vm[i][i]
- tmp := galDivide(1, vm[i][i])
- for j := 0; j < totalShards; j++ {
- vm[j][i] = galMultiply(vm[j][i], tmp)
- }
- }
- for j := 0; j < dataShards; j++ {
- // Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
- tmp := vm[i][j]
- if j != i && tmp != 0 {
- for r := 0; r < totalShards; r++ {
- vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
- }
- }
- }
- }
-
- // Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
- for j := 0; j < dataShards; j++ {
- tmp := vm[dataShards][j]
- if tmp != 1 {
- tmp = galDivide(1, tmp)
- for i := dataShards; i < totalShards; i++ {
- vm[i][j] = galMultiply(vm[i][j], tmp)
- }
- }
- }
-
- // Make vm[dataShards...totalShards-1][0] column all ones - divide each row
- for i := dataShards + 1; i < totalShards; i++ {
- tmp := vm[i][0]
- if tmp != 1 {
- tmp = galDivide(1, tmp)
- for j := 0; j < dataShards; j++ {
- vm[i][j] = galMultiply(vm[i][j], tmp)
- }
- }
- }
-
- return vm, nil
-}
-
-// buildMatrixPAR1 creates the matrix to use for encoding according to
-// the PARv1 spec, given the number of data shards and the number of
-// total shards. Note that the method they use is buggy, and may lead
-// to cases where recovery is impossible, even if there are enough
-// parity shards.
-//
-// The top square of the matrix is guaranteed to be an identity
-// matrix, which means that the data shards are unchanged after
-// encoding.
-func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
- result, err := newMatrix(totalShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- for r, row := range result {
- // The top portion of the matrix is the identity
- // matrix, and the bottom is a transposed Vandermonde
- // matrix starting at 1 instead of 0.
- if r < dataShards {
- result[r][r] = 1
- } else {
- for c := range row {
- result[r][c] = galExp(byte(c+1), r-dataShards)
- }
- }
- }
- return result, nil
-}
-
-func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
- result, err := newMatrix(totalShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- for r, row := range result {
- // The top portion of the matrix is the identity
- // matrix, and the bottom is a transposed Cauchy matrix.
- if r < dataShards {
- result[r][r] = 1
- } else {
- for c := range row {
- result[r][c] = invTable[(byte(r ^ c))]
- }
- }
- }
- return result, nil
-}
-
-// buildXorMatrix can be used to build a matrix with pure XOR
-// operations if there is only one parity shard.
-func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
- if dataShards+1 != totalShards {
- return nil, errors.New("internal error")
- }
- result, err := newMatrix(totalShards, dataShards)
- if err != nil {
- return nil, err
- }
-
- for r, row := range result {
- // The top portion of the matrix is the identity
- // matrix.
- if r < dataShards {
- result[r][r] = 1
- } else {
- // Set all values to 1 (XOR)
- for c := range row {
- result[r][c] = 1
- }
- }
- }
- return result, nil
-}
-
-// New creates a new encoder and initializes it to
-// the number of data shards and parity shards that
-// you want to use. You can reuse this encoder.
-// Note that the maximum number of total shards is 65536, with some
-// restrictions for a total larger than 256:
-//
-// - Shard sizes must be multiple of 64
-// - The methods Join/Split/Update/EncodeIdx are not supported
-//
-// If no options are supplied, default options are used.
-func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
- o := defaultOptions
- for _, opt := range opts {
- opt(&o)
- }
-
- if (dataShards+parityShards > 256 && o.withLeopard == nil) ||
- (o.withLeopard != nil && *o.withLeopard == true && parityShards > 0) {
- return newFF16(dataShards, parityShards, o)
- }
- if dataShards+parityShards > 256 {
- return nil, ErrMaxShardNum
- }
-
- r := reedSolomon{
- dataShards: dataShards,
- parityShards: parityShards,
- totalShards: dataShards + parityShards,
- o: o,
- }
-
- if dataShards <= 0 || parityShards < 0 {
- return nil, ErrInvShardNum
- }
-
- if parityShards == 0 {
- return &r, nil
- }
-
- var err error
- switch {
- case r.o.customMatrix != nil:
- if len(r.o.customMatrix) < parityShards {
- return nil, errors.New("coding matrix must contain at least parityShards rows")
- }
- r.m = make([][]byte, r.totalShards)
- for i := 0; i < dataShards; i++ {
- r.m[i] = make([]byte, dataShards)
- r.m[i][i] = 1
- }
- for k, row := range r.o.customMatrix {
- if len(row) < dataShards {
- return nil, errors.New("coding matrix must contain at least dataShards columns")
- }
- r.m[dataShards+k] = make([]byte, dataShards)
- copy(r.m[dataShards+k], row)
- }
- case r.o.fastOneParity && parityShards == 1:
- r.m, err = buildXorMatrix(dataShards, r.totalShards)
- case r.o.useCauchy:
- r.m, err = buildMatrixCauchy(dataShards, r.totalShards)
- case r.o.usePAR1Matrix:
- r.m, err = buildMatrixPAR1(dataShards, r.totalShards)
- case r.o.useJerasureMatrix:
- r.m, err = buildMatrixJerasure(dataShards, r.totalShards)
- default:
- r.m, err = buildMatrix(dataShards, r.totalShards)
- }
- if err != nil {
- return nil, err
- }
-
- // Calculate what we want per round
- r.o.perRound = cpuid.CPU.Cache.L2
-
- divide := parityShards + 1
- if avx2CodeGen && r.o.useAVX2 && (dataShards > maxAvx2Inputs || parityShards > maxAvx2Outputs) {
- // Base on L1 cache if we have many inputs.
- r.o.perRound = cpuid.CPU.Cache.L1D
- divide = 0
- if dataShards > maxAvx2Inputs {
- divide += maxAvx2Inputs
- } else {
- divide += dataShards
- }
- if parityShards > maxAvx2Inputs {
- divide += maxAvx2Outputs
- } else {
- divide += parityShards
- }
- }
-
- if r.o.perRound <= 0 {
- // Set to 128K if undetectable.
- r.o.perRound = 128 << 10
- }
-
- if cpuid.CPU.ThreadsPerCore > 1 && r.o.maxGoroutines > cpuid.CPU.PhysicalCores {
- // If multiple threads per core, make sure they don't contend for cache.
- r.o.perRound /= cpuid.CPU.ThreadsPerCore
- }
-
- // 1 input + parity must fit in cache, and we add one more to be safer.
- r.o.perRound = r.o.perRound / divide
- // Align to 64 bytes.
- r.o.perRound = ((r.o.perRound + 63) / 64) * 64
-
- if r.o.minSplitSize <= 0 {
- // Set minsplit as high as we can, but still have parity in L1.
- cacheSize := cpuid.CPU.Cache.L1D
- if cacheSize <= 0 {
- cacheSize = 32 << 10
- }
-
- r.o.minSplitSize = cacheSize / (parityShards + 1)
- // Min 1K
- if r.o.minSplitSize < 1024 {
- r.o.minSplitSize = 1024
- }
- }
-
- if r.o.shardSize > 0 {
- p := runtime.GOMAXPROCS(0)
- if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 {
- // Not worth it.
- r.o.maxGoroutines = 1
- } else {
- g := r.o.shardSize / r.o.perRound
-
- // Overprovision by a factor of 2.
- if g < p*2 && r.o.perRound > r.o.minSplitSize*2 {
- g = p * 2
- r.o.perRound /= 2
- }
-
- // Have g be multiple of p
- g += p - 1
- g -= g % p
-
- r.o.maxGoroutines = g
- }
- }
-
- // Generated AVX2 does not need data to stay in L1 cache between runs.
- // We will be purely limited by RAM speed.
- if r.canAVX2C(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > avx2CodeGenMaxGoroutines {
- r.o.maxGoroutines = avx2CodeGenMaxGoroutines
- }
-
- // Inverted matrices are cached in a tree keyed by the indices
- // of the invalid rows of the data to reconstruct.
- // The inversion root node will have the identity matrix as
- // its inversion matrix because it implies there are no errors
- // with the original data.
- if r.o.inversionCache {
- r.tree = newInversionTree(dataShards, parityShards)
- }
-
- r.parity = make([][]byte, parityShards)
- for i := range r.parity {
- r.parity[i] = r.m[dataShards+i]
- }
-
- if avx2CodeGen && r.o.useAVX2 {
- sz := r.dataShards * r.parityShards * 2 * 32
- r.mPool.New = func() interface{} {
- return make([]byte, sz)
- }
- }
- return &r, err
-}
-
-// ErrTooFewShards is returned if too few shards where given to
-// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
-// if there were too few shards to reconstruct the missing data.
-var ErrTooFewShards = errors.New("too few shards given")
-
-// Encode parity for a set of data shards.
-// An array 'shards' containing data shards followed by parity shards.
-// The number of shards must match the number given to New.
-// Each shard is a byte array, and they must all be the same size.
-// The parity shards will always be overwritten and the data shards
-// will remain the same.
-func (r *reedSolomon) Encode(shards [][]byte) error {
- if len(shards) != r.totalShards {
- return ErrTooFewShards
- }
-
- err := checkShards(shards, false)
- if err != nil {
- return err
- }
-
- // Get the slice of output buffers.
- output := shards[r.dataShards:]
-
- // Do the coding.
- r.codeSomeShards(r.parity, shards[0:r.dataShards], output[:r.parityShards], len(shards[0]))
- return nil
-}
-
-// EncodeIdx will add parity for a single data shard.
-// Parity shards should start out zeroed. The caller must zero them before first call.
-// Data shards should only be delivered once. There is no check for this.
-// The parity shards will always be updated and the data shards will remain the unchanged.
-func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
- if len(parity) != r.parityShards {
- return ErrTooFewShards
- }
- if len(parity) == 0 {
- return nil
- }
- if idx < 0 || idx >= r.dataShards {
- return ErrInvShardNum
- }
- err := checkShards(parity, false)
- if err != nil {
- return err
- }
- if len(parity[0]) != len(dataShard) {
- return ErrShardSize
- }
-
- // Process using no goroutines for now.
- start, end := 0, r.o.perRound
- if end > len(dataShard) {
- end = len(dataShard)
- }
-
- for start < len(dataShard) {
- in := dataShard[start:end]
- for iRow := 0; iRow < r.parityShards; iRow++ {
- galMulSliceXor(r.parity[iRow][idx], in, parity[iRow][start:end], &r.o)
- }
- start = end
- end += r.o.perRound
- if end > len(dataShard) {
- end = len(dataShard)
- }
- }
- return nil
-}
-
-// ErrInvalidInput is returned if invalid input parameter of Update.
-var ErrInvalidInput = errors.New("invalid input")
-
-func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
- if len(shards) != r.totalShards {
- return ErrTooFewShards
- }
-
- if len(newDatashards) != r.dataShards {
- return ErrTooFewShards
- }
-
- err := checkShards(shards, true)
- if err != nil {
- return err
- }
-
- err = checkShards(newDatashards, true)
- if err != nil {
- return err
- }
-
- for i := range newDatashards {
- if newDatashards[i] != nil && shards[i] == nil {
- return ErrInvalidInput
- }
- }
- for _, p := range shards[r.dataShards:] {
- if p == nil {
- return ErrInvalidInput
- }
- }
-
- shardSize := shardSize(shards)
-
- // Get the slice of output buffers.
- output := shards[r.dataShards:]
-
- // Do the coding.
- r.updateParityShards(r.parity, shards[0:r.dataShards], newDatashards[0:r.dataShards], output, r.parityShards, shardSize)
- return nil
-}
-
-func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
- if len(outputs) == 0 {
- return
- }
-
- if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
- r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
- return
- }
-
- for c := 0; c < r.dataShards; c++ {
- in := newinputs[c]
- if in == nil {
- continue
- }
- oldin := oldinputs[c]
- // oldinputs data will be changed
- sliceXor(in, oldin, &r.o)
- for iRow := 0; iRow < outputCount; iRow++ {
- galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
- }
- }
-}
-
-func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
- var wg sync.WaitGroup
- do := byteCount / r.o.maxGoroutines
- if do < r.o.minSplitSize {
- do = r.o.minSplitSize
- }
- start := 0
- for start < byteCount {
- if start+do > byteCount {
- do = byteCount - start
- }
- wg.Add(1)
- go func(start, stop int) {
- for c := 0; c < r.dataShards; c++ {
- in := newinputs[c]
- if in == nil {
- continue
- }
- oldin := oldinputs[c]
- // oldinputs data will be change
- sliceXor(in[start:stop], oldin[start:stop], &r.o)
- for iRow := 0; iRow < outputCount; iRow++ {
- galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
- }
- }
- wg.Done()
- }(start, start+do)
- start += do
- }
- wg.Wait()
-}
-
-// Verify returns true if the parity shards contain the right data.
-// The data is the same format as Encode. No data is modified.
-func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
- if len(shards) != r.totalShards {
- return false, ErrTooFewShards
- }
- err := checkShards(shards, false)
- if err != nil {
- return false, err
- }
-
- // Slice of buffers being checked.
- toCheck := shards[r.dataShards:]
-
- // Do the checking.
- return r.checkSomeShards(r.parity, shards[:r.dataShards], toCheck[:r.parityShards], len(shards[0])), nil
-}
-
-func (r *reedSolomon) canAVX2C(byteCount int, inputs, outputs int) bool {
- return avx2CodeGen && r.o.useAVX2 &&
- byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
- inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
-}
-
-// Multiplies a subset of rows from a coding matrix by a full set of
-// input totalShards to produce some output totalShards.
-// 'matrixRows' is The rows from the matrix to use.
-// 'inputs' An array of byte arrays, each of which is one input shard.
-// The number of inputs used is determined by the length of each matrix row.
-// outputs Byte arrays where the computed totalShards are stored.
-// The number of outputs computed, and the
-// number of matrix rows used, is determined by
-// outputCount, which is the number of outputs to compute.
-func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, byteCount int) {
- if len(outputs) == 0 {
- return
- }
- switch {
- case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2:
- r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, byteCount)
- return
- case r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2:
- r.codeSomeShardsAvx512(matrixRows, inputs, outputs, byteCount)
- return
- case byteCount > r.o.minSplitSize:
- r.codeSomeShardsP(matrixRows, inputs, outputs, byteCount)
- return
- }
-
- // Process using no goroutines
- start, end := 0, r.o.perRound
- if end > len(inputs[0]) {
- end = len(inputs[0])
- }
- if r.canAVX2C(byteCount, len(inputs), len(outputs)) {
- m := genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.mPool.Get().([]byte))
- start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
- r.mPool.Put(m)
- end = len(inputs[0])
- } else if len(inputs)+len(outputs) > avx2CodeGenMinShards && r.canAVX2C(byteCount, maxAvx2Inputs, maxAvx2Outputs) {
- end = len(inputs[0])
- inIdx := 0
- m := r.mPool.Get().([]byte)
- defer r.mPool.Put(m)
- ins := inputs
- for len(ins) > 0 {
- inPer := ins
- if len(inPer) > maxAvx2Inputs {
- inPer = inPer[:maxAvx2Inputs]
- }
- outs := outputs
- outIdx := 0
- for len(outs) > 0 {
- outPer := outs
- if len(outPer) > maxAvx2Outputs {
- outPer = outPer[:maxAvx2Outputs]
- }
- m = genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), m)
- if inIdx == 0 {
- galMulSlicesAvx2(m, inPer, outPer, 0, byteCount)
- } else {
- galMulSlicesAvx2Xor(m, inPer, outPer, 0, byteCount)
- }
- start = byteCount & avxSizeMask
- outIdx += len(outPer)
- outs = outs[len(outPer):]
- }
- inIdx += len(inPer)
- ins = ins[len(inPer):]
- }
- if start >= end {
- return
- }
- }
- for start < len(inputs[0]) {
- for c := 0; c < len(inputs); c++ {
- in := inputs[c][start:end]
- for iRow := 0; iRow < len(outputs); iRow++ {
- if c == 0 {
- galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
- } else {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
- }
- }
- }
- start = end
- end += r.o.perRound
- if end > len(inputs[0]) {
- end = len(inputs[0])
- }
- }
-}
-
-// Perform the same as codeSomeShards, but split the workload into
-// several goroutines.
-func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byteCount int) {
- var wg sync.WaitGroup
- gor := r.o.maxGoroutines
-
- var avx2Matrix []byte
- useAvx2 := r.canAVX2C(byteCount, len(inputs), len(outputs))
- if useAvx2 {
- avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.mPool.Get().([]byte))
- defer r.mPool.Put(avx2Matrix)
- } else if byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards &&
- r.canAVX2C(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) {
- // It appears there is a switchover point at around 10MB where
- // Regular processing is faster...
- r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount)
- return
- }
-
- do := byteCount / gor
- if do < r.o.minSplitSize {
- do = r.o.minSplitSize
- }
-
- exec := func(start, stop int) {
- if useAvx2 && stop-start >= 64 {
- start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
- }
-
- lstart, lstop := start, start+r.o.perRound
- if lstop > stop {
- lstop = stop
- }
- for lstart < stop {
- for c := 0; c < len(inputs); c++ {
- in := inputs[c][lstart:lstop]
- for iRow := 0; iRow < len(outputs); iRow++ {
- if c == 0 {
- galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- } else {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- }
- }
- }
- lstart = lstop
- lstop += r.o.perRound
- if lstop > stop {
- lstop = stop
- }
- }
- wg.Done()
- }
- if gor <= 1 {
- wg.Add(1)
- exec(0, byteCount)
- return
- }
-
- // Make sizes divisible by 64
- do = (do + 63) & (^63)
- start := 0
- for start < byteCount {
- if start+do > byteCount {
- do = byteCount - start
- }
-
- wg.Add(1)
- go exec(start, start+do)
- start += do
- }
- wg.Wait()
-}
-
-// Perform the same as codeSomeShards, but split the workload into
-// several goroutines.
-func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, byteCount int) {
- var wg sync.WaitGroup
- gor := r.o.maxGoroutines
-
- type state struct {
- input [][]byte
- output [][]byte
- m []byte
- first bool
- }
- // Make a plan...
- plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs))
-
- tmp := r.mPool.Get().([]byte)
- defer func(b []byte) {
- r.mPool.Put(b)
- }(tmp)
-
- // Flips between input first to output first.
- // We put the smallest data load in the inner loop.
- if len(inputs) > len(outputs) {
- inIdx := 0
- ins := inputs
- for len(ins) > 0 {
- inPer := ins
- if len(inPer) > maxAvx2Inputs {
- inPer = inPer[:maxAvx2Inputs]
- }
- outs := outputs
- outIdx := 0
- for len(outs) > 0 {
- outPer := outs
- if len(outPer) > maxAvx2Outputs {
- outPer = outPer[:maxAvx2Outputs]
- }
- // Generate local matrix
- m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
- tmp = tmp[len(m):]
- plan = append(plan, state{
- input: inPer,
- output: outPer,
- m: m,
- first: inIdx == 0,
- })
- outIdx += len(outPer)
- outs = outs[len(outPer):]
- }
- inIdx += len(inPer)
- ins = ins[len(inPer):]
- }
- } else {
- outs := outputs
- outIdx := 0
- for len(outs) > 0 {
- outPer := outs
- if len(outPer) > maxAvx2Outputs {
- outPer = outPer[:maxAvx2Outputs]
- }
-
- inIdx := 0
- ins := inputs
- for len(ins) > 0 {
- inPer := ins
- if len(inPer) > maxAvx2Inputs {
- inPer = inPer[:maxAvx2Inputs]
- }
- // Generate local matrix
- m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
- tmp = tmp[len(m):]
- //fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound)
- plan = append(plan, state{
- input: inPer,
- output: outPer,
- m: m,
- first: inIdx == 0,
- })
- inIdx += len(inPer)
- ins = ins[len(inPer):]
- }
- outIdx += len(outPer)
- outs = outs[len(outPer):]
- }
- }
-
- do := byteCount / gor
- if do < r.o.minSplitSize {
- do = r.o.minSplitSize
- }
-
- exec := func(start, stop int) {
- lstart, lstop := start, start+r.o.perRound
- if lstop > stop {
- lstop = stop
- }
- for lstart < stop {
- if lstop-lstart >= minAvx2Size {
- // Execute plan...
- for _, p := range plan {
- if p.first {
- galMulSlicesAvx2(p.m, p.input, p.output, lstart, lstop)
- } else {
- galMulSlicesAvx2Xor(p.m, p.input, p.output, lstart, lstop)
- }
- }
- lstart += (lstop - lstart) & avxSizeMask
- if lstart == lstop {
- lstop += r.o.perRound
- if lstop > stop {
- lstop = stop
- }
- continue
- }
- }
-
- for c := range inputs {
- in := inputs[c][lstart:lstop]
- for iRow := 0; iRow < len(outputs); iRow++ {
- if c == 0 {
- galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- } else {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- }
- }
- }
- lstart = lstop
- lstop += r.o.perRound
- if lstop > stop {
- lstop = stop
- }
- }
- wg.Done()
- }
- if gor == 1 {
- wg.Add(1)
- exec(0, byteCount)
- return
- }
-
- // Make sizes divisible by 64
- do = (do + 63) & (^63)
- start := 0
- for start < byteCount {
- if start+do > byteCount {
- do = byteCount - start
- }
-
- wg.Add(1)
- go exec(start, start+do)
- start += do
- }
- wg.Wait()
-}
-
-// checkSomeShards is mostly the same as codeSomeShards,
-// except this will check values and return
-// as soon as a difference is found.
-func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, byteCount int) bool {
- if len(toCheck) == 0 {
- return true
- }
-
- outputs := make([][]byte, len(toCheck))
- for i := range outputs {
- outputs[i] = make([]byte, byteCount)
- }
- r.codeSomeShards(matrixRows, inputs, outputs, byteCount)
-
- for i, calc := range outputs {
- if !bytes.Equal(calc, toCheck[i]) {
- return false
- }
- }
- return true
-}
-
-// ErrShardNoData will be returned if there are no shards,
-// or if the length of all shards is zero.
-var ErrShardNoData = errors.New("no shard data")
-
-// ErrShardSize is returned if shard length isn't the same for all
-// shards.
-var ErrShardSize = errors.New("shard sizes do not match")
-
-// checkShards will check if shards are the same size
-// or 0, if allowed. An error is returned if this fails.
-// An error is also returned if all shards are size 0.
-func checkShards(shards [][]byte, nilok bool) error {
- size := shardSize(shards)
- if size == 0 {
- return ErrShardNoData
- }
- for _, shard := range shards {
- if len(shard) != size {
- if len(shard) != 0 || !nilok {
- return ErrShardSize
- }
- }
- }
- return nil
-}
-
-// shardSize return the size of a single shard.
-// The first non-zero size is returned,
-// or 0 if all shards are size 0.
-func shardSize(shards [][]byte) int {
- for _, shard := range shards {
- if len(shard) != 0 {
- return len(shard)
- }
- }
- return 0
-}
-
-// Reconstruct will recreate the missing shards, if possible.
-//
-// Given a list of shards, some of which contain data, fills in the
-// ones that don't have data.
-//
-// The length of the array must be equal to shards.
-// You indicate that a shard is missing by setting it to nil or zero-length.
-// If a shard is zero-length but has sufficient capacity, that memory will
-// be used, otherwise a new []byte will be allocated.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// The reconstructed shard set is complete, but integrity is not verified.
-// Use the Verify function to check if data set is ok.
-func (r *reedSolomon) Reconstruct(shards [][]byte) error {
- return r.reconstruct(shards, false, nil)
-}
-
-// ReconstructData will recreate any missing data shards, if possible.
-//
-// Given a list of shards, some of which contain data, fills in the
-// data shards that don't have data.
-//
-// The length of the array must be equal to shards.
-// You indicate that a shard is missing by setting it to nil or zero-length.
-// If a shard is zero-length but has sufficient capacity, that memory will
-// be used, otherwise a new []byte will be allocated.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// As the reconstructed shard set may contain missing parity shards,
-// calling the Verify function is likely to fail.
-func (r *reedSolomon) ReconstructData(shards [][]byte) error {
- return r.reconstruct(shards, true, nil)
-}
-
-// ReconstructSome will recreate only requested data shards, if possible.
-//
-// Given a list of shards, some of which contain data, fills in the
-// data shards indicated by true values in the "required" parameter.
-// The length of "required" array must be equal to dataShards.
-//
-// The length of "shards" array must be equal to shards.
-// You indicate that a shard is missing by setting it to nil or zero-length.
-// If a shard is zero-length but has sufficient capacity, that memory will
-// be used, otherwise a new []byte will be allocated.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// As the reconstructed shard set may contain missing parity shards,
-// calling the Verify function is likely to fail.
-func (r *reedSolomon) ReconstructSome(shards [][]byte, required []bool) error {
- return r.reconstruct(shards, true, required)
-}
-
-// reconstruct will recreate the missing data totalShards, and unless
-// dataOnly is true, also the missing parity totalShards
-//
-// The length of "shards" array must be equal to totalShards.
-// You indicate that a shard is missing by setting it to nil.
-//
-// If there are too few totalShards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool, required []bool) error {
- if len(shards) != r.totalShards || required != nil && len(required) < r.dataShards {
- return ErrTooFewShards
- }
- // Check arguments.
- err := checkShards(shards, true)
- if err != nil {
- return err
- }
-
- shardSize := shardSize(shards)
-
- // Quick check: are all of the shards present? If so, there's
- // nothing to do.
- numberPresent := 0
- dataPresent := 0
- missingRequired := 0
- for i := 0; i < r.totalShards; i++ {
- if len(shards[i]) != 0 {
- numberPresent++
- if i < r.dataShards {
- dataPresent++
- }
- } else if required != nil && required[i] {
- missingRequired++
- }
- }
- if numberPresent == r.totalShards || dataOnly && dataPresent == r.dataShards ||
- required != nil && missingRequired == 0 {
- // Cool. All of the shards have data. We don't
- // need to do anything.
- return nil
- }
-
- // More complete sanity check
- if numberPresent < r.dataShards {
- return ErrTooFewShards
- }
-
- // Pull out an array holding just the shards that
- // correspond to the rows of the submatrix. These shards
- // will be the input to the decoding process that re-creates
- // the missing data shards.
- //
- // Also, create an array of indices of the valid rows we do have
- // and the invalid rows we don't have up until we have enough valid rows.
- subShards := make([][]byte, r.dataShards)
- validIndices := make([]int, r.dataShards)
- invalidIndices := make([]int, 0)
- subMatrixRow := 0
- for matrixRow := 0; matrixRow < r.totalShards && subMatrixRow < r.dataShards; matrixRow++ {
- if len(shards[matrixRow]) != 0 {
- subShards[subMatrixRow] = shards[matrixRow]
- validIndices[subMatrixRow] = matrixRow
- subMatrixRow++
- } else {
- invalidIndices = append(invalidIndices, matrixRow)
- }
- }
-
- // Attempt to get the cached inverted matrix out of the tree
- // based on the indices of the invalid rows.
- dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
-
- // If the inverted matrix isn't cached in the tree yet we must
- // construct it ourselves and insert it into the tree for the
- // future. In this way the inversion tree is lazily loaded.
- if dataDecodeMatrix == nil {
- // Pull out the rows of the matrix that correspond to the
- // shards that we have and build a square matrix. This
- // matrix could be used to generate the shards that we have
- // from the original data.
- subMatrix, _ := newMatrix(r.dataShards, r.dataShards)
- for subMatrixRow, validIndex := range validIndices {
- for c := 0; c < r.dataShards; c++ {
- subMatrix[subMatrixRow][c] = r.m[validIndex][c]
- }
- }
- // Invert the matrix, so we can go from the encoded shards
- // back to the original data. Then pull out the row that
- // generates the shard that we want to decode. Note that
- // since this matrix maps back to the original data, it can
- // be used to create a data shard, but not a parity shard.
- dataDecodeMatrix, err = subMatrix.Invert()
- if err != nil {
- return err
- }
-
- // Cache the inverted matrix in the tree for future use keyed on the
- // indices of the invalid rows.
- err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.totalShards)
- if err != nil {
- return err
- }
- }
-
- // Re-create any data shards that were missing.
- //
- // The input to the coding is all of the shards we actually
- // have, and the output is the missing data shards. The computation
- // is done using the special decode matrix we just built.
- outputs := make([][]byte, r.parityShards)
- matrixRows := make([][]byte, r.parityShards)
- outputCount := 0
-
- for iShard := 0; iShard < r.dataShards; iShard++ {
- if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
- if cap(shards[iShard]) >= shardSize {
- shards[iShard] = shards[iShard][0:shardSize]
- } else {
- shards[iShard] = make([]byte, shardSize)
- }
- outputs[outputCount] = shards[iShard]
- matrixRows[outputCount] = dataDecodeMatrix[iShard]
- outputCount++
- }
- }
- r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], shardSize)
-
- if dataOnly {
- // Exit out early if we are only interested in the data shards
- return nil
- }
-
- // Now that we have all of the data shards intact, we can
- // compute any of the parity that is missing.
- //
- // The input to the coding is ALL of the data shards, including
- // any that we just calculated. The output is whichever of the
- // data shards were missing.
- outputCount = 0
- for iShard := r.dataShards; iShard < r.totalShards; iShard++ {
- if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
- if cap(shards[iShard]) >= shardSize {
- shards[iShard] = shards[iShard][0:shardSize]
- } else {
- shards[iShard] = make([]byte, shardSize)
- }
- outputs[outputCount] = shards[iShard]
- matrixRows[outputCount] = r.parity[iShard-r.dataShards]
- outputCount++
- }
- }
- r.codeSomeShards(matrixRows, shards[:r.dataShards], outputs[:outputCount], shardSize)
- return nil
-}
-
-// ErrShortData will be returned by Split(), if there isn't enough data
-// to fill the number of shards.
-var ErrShortData = errors.New("not enough data to fill the number of requested shards")
-
-// Split a data slice into the number of shards given to the encoder,
-// and create empty parity shards if necessary.
-//
-// The data will be split into equally sized shards.
-// If the data size isn't divisible by the number of shards,
-// the last shard will contain extra zeros.
-//
-// There must be at least 1 byte otherwise ErrShortData will be
-// returned.
-//
-// The data will not be copied, except for the last shard, so you
-// should not modify the data of the input slice afterwards.
-func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
- if len(data) == 0 {
- return nil, ErrShortData
- }
- dataLen := len(data)
- // Calculate number of bytes per data shard.
- perShard := (len(data) + r.dataShards - 1) / r.dataShards
-
- if cap(data) > len(data) {
- data = data[:cap(data)]
- }
-
- // Only allocate memory if necessary
- var padding []byte
- if len(data) < (r.totalShards * perShard) {
- // calculate maximum number of full shards in `data` slice
- fullShards := len(data) / perShard
- padding = make([]byte, r.totalShards*perShard-perShard*fullShards)
- copy(padding, data[perShard*fullShards:])
- data = data[0 : perShard*fullShards]
- } else {
- for i := dataLen; i < dataLen+r.dataShards; i++ {
- data[i] = 0
- }
- }
-
- // Split into equal-length shards.
- dst := make([][]byte, r.totalShards)
- i := 0
- for ; i < len(dst) && len(data) >= perShard; i++ {
- dst[i] = data[:perShard:perShard]
- data = data[perShard:]
- }
-
- for j := 0; i+j < len(dst); j++ {
- dst[i+j] = padding[:perShard:perShard]
- padding = padding[perShard:]
- }
-
- return dst, nil
-}
-
-// ErrReconstructRequired is returned if too few data shards are intact and a
-// reconstruction is required before you can successfully join the shards.
-var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
-
-// Join the shards and write the data segment to dst.
-//
-// Only the data shards are considered.
-// You must supply the exact output size you want.
-//
-// If there are to few shards given, ErrTooFewShards will be returned.
-// If the total data size is less than outSize, ErrShortData will be returned.
-// If one or more required data shards are nil, ErrReconstructRequired will be returned.
-func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
- // Do we have enough shards?
- if len(shards) < r.dataShards {
- return ErrTooFewShards
- }
- shards = shards[:r.dataShards]
-
- // Do we have enough data?
- size := 0
- for _, shard := range shards {
- if shard == nil {
- return ErrReconstructRequired
- }
- size += len(shard)
-
- // Do we have enough data already?
- if size >= outSize {
- break
- }
- }
- if size < outSize {
- return ErrShortData
- }
-
- // Copy data to dst
- write := outSize
- for _, shard := range shards {
- if write < len(shard) {
- _, err := dst.Write(shard[:write])
- return err
- }
- n, err := dst.Write(shard)
- if err != nil {
- return err
- }
- write -= n
- }
- return nil
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/streaming.go b/vendor/github.com/klauspost/reedsolomon/streaming.go
deleted file mode 100644
index e3aaf00..0000000
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ /dev/null
@@ -1,607 +0,0 @@
-/**
- * Reed-Solomon Coding over 8-bit values.
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-package reedsolomon
-
-import (
- "bytes"
- "errors"
- "fmt"
- "io"
- "sync"
-)
-
-// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
-// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
-//
-// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
-// since the streaming interface has a start up overhead.
-//
-// For all operations, no readers and writers should not assume any order/size of
-// individual reads/writes.
-//
-// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
-// folder.
-type StreamEncoder interface {
- // Encode parity shards for a set of data shards.
- //
- // Input is 'shards' containing readers for data shards followed by parity shards
- // io.Writer.
- //
- // The number of shards must match the number given to NewStream().
- //
- // Each reader must supply the same number of bytes.
- //
- // The parity shards will be written to the writer.
- // The number of bytes written will match the input size.
- //
- // If a data stream returns an error, a StreamReadError type error
- // will be returned. If a parity writer returns an error, a
- // StreamWriteError will be returned.
- Encode(data []io.Reader, parity []io.Writer) error
-
- // Verify returns true if the parity shards contain correct data.
- //
- // The number of shards must match the number total data+parity shards
- // given to NewStream().
- //
- // Each reader must supply the same number of bytes.
- // If a shard stream returns an error, a StreamReadError type error
- // will be returned.
- Verify(shards []io.Reader) (bool, error)
-
- // Reconstruct will recreate the missing shards if possible.
- //
- // Given a list of valid shards (to read) and invalid shards (to write)
- //
- // You indicate that a shard is missing by setting it to nil in the 'valid'
- // slice and at the same time setting a non-nil writer in "fill".
- // An index cannot contain both non-nil 'valid' and 'fill' entry.
- // If both are provided 'ErrReconstructMismatch' is returned.
- //
- // If there are too few shards to reconstruct the missing
- // ones, ErrTooFewShards will be returned.
- //
- // The reconstructed shard set is complete, but integrity is not verified.
- // Use the Verify function to check if data set is ok.
- Reconstruct(valid []io.Reader, fill []io.Writer) error
-
- // Split a an input stream into the number of shards given to the encoder.
- //
- // The data will be split into equally sized shards.
- // If the data size isn't dividable by the number of shards,
- // the last shard will contain extra zeros.
- //
- // You must supply the total size of your input.
- // 'ErrShortData' will be returned if it is unable to retrieve the
- // number of bytes indicated.
- Split(data io.Reader, dst []io.Writer, size int64) (err error)
-
- // Join the shards and write the data segment to dst.
- //
- // Only the data shards are considered.
- //
- // You must supply the exact output size you want.
- // If there are to few shards given, ErrTooFewShards will be returned.
- // If the total data size is less than outSize, ErrShortData will be returned.
- Join(dst io.Writer, shards []io.Reader, outSize int64) error
-}
-
-// StreamReadError is returned when a read error is encountered
-// that relates to a supplied stream.
-// This will allow you to find out which reader has failed.
-type StreamReadError struct {
- Err error // The error
- Stream int // The stream number on which the error occurred
-}
-
-// Error returns the error as a string
-func (s StreamReadError) Error() string {
- return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
-}
-
-// String returns the error as a string
-func (s StreamReadError) String() string {
- return s.Error()
-}
-
-// StreamWriteError is returned when a write error is encountered
-// that relates to a supplied stream. This will allow you to
-// find out which reader has failed.
-type StreamWriteError struct {
- Err error // The error
- Stream int // The stream number on which the error occurred
-}
-
-// Error returns the error as a string
-func (s StreamWriteError) Error() string {
- return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
-}
-
-// String returns the error as a string
-func (s StreamWriteError) String() string {
- return s.Error()
-}
-
-// rsStream contains a matrix for a specific
-// distribution of datashards and parity shards.
-// Construct if using NewStream()
-type rsStream struct {
- r *reedSolomon
- o options
-
- // Shard reader
- readShards func(dst [][]byte, in []io.Reader) error
- // Shard writer
- writeShards func(out []io.Writer, in [][]byte) error
-
- blockPool sync.Pool
-}
-
-// NewStream creates a new encoder and initializes it to
-// the number of data shards and parity shards that
-// you want to use. You can reuse this encoder.
-// Note that the maximum number of data shards is 256.
-func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
- if dataShards+parityShards > 256 {
- return nil, ErrMaxShardNum
- }
-
- r := rsStream{o: defaultOptions}
- for _, opt := range o {
- opt(&r.o)
- }
- // Override block size if shard size is set.
- if r.o.streamBS == 0 && r.o.shardSize > 0 {
- r.o.streamBS = r.o.shardSize
- }
- if r.o.streamBS <= 0 {
- r.o.streamBS = 4 << 20
- }
- if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
- o = append(o, WithAutoGoroutines(r.o.streamBS))
- }
-
- enc, err := New(dataShards, parityShards, o...)
- if err != nil {
- return nil, err
- }
- r.r = enc.(*reedSolomon)
-
- r.blockPool.New = func() interface{} {
- out := make([][]byte, dataShards+parityShards)
- for i := range out {
- out[i] = make([]byte, r.o.streamBS)
- }
- return out
- }
- r.readShards = readShards
- r.writeShards = writeShards
- if r.o.concReads {
- r.readShards = cReadShards
- }
- if r.o.concWrites {
- r.writeShards = cWriteShards
- }
-
- return &r, err
-}
-
-// NewStreamC creates a new encoder and initializes it to
-// the number of data shards and parity shards given.
-//
-// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
-func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
- return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
-}
-
-func (r *rsStream) createSlice() [][]byte {
- out := r.blockPool.Get().([][]byte)
- for i := range out {
- out[i] = out[i][:r.o.streamBS]
- }
- return out
-}
-
-// Encodes parity shards for a set of data shards.
-//
-// Input is 'shards' containing readers for data shards followed by parity shards
-// io.Writer.
-//
-// The number of shards must match the number given to NewStream().
-//
-// Each reader must supply the same number of bytes.
-//
-// The parity shards will be written to the writer.
-// The number of bytes written will match the input size.
-//
-// If a data stream returns an error, a StreamReadError type error
-// will be returned. If a parity writer returns an error, a
-// StreamWriteError will be returned.
-func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
- if len(data) != r.r.dataShards {
- return ErrTooFewShards
- }
-
- if len(parity) != r.r.parityShards {
- return ErrTooFewShards
- }
-
- all := r.createSlice()
- defer r.blockPool.Put(all)
- in := all[:r.r.dataShards]
- out := all[r.r.dataShards:]
- read := 0
-
- for {
- err := r.readShards(in, data)
- switch err {
- case nil:
- case io.EOF:
- if read == 0 {
- return ErrShardNoData
- }
- return nil
- default:
- return err
- }
- out = trimShards(out, shardSize(in))
- read += shardSize(in)
- err = r.r.Encode(all)
- if err != nil {
- return err
- }
- err = r.writeShards(parity, out)
- if err != nil {
- return err
- }
- }
-}
-
-// Trim the shards so they are all the same size
-func trimShards(in [][]byte, size int) [][]byte {
- for i := range in {
- if len(in[i]) != 0 {
- in[i] = in[i][0:size]
- }
- if len(in[i]) < size {
- in[i] = in[i][:0]
- }
- }
- return in
-}
-
-func readShards(dst [][]byte, in []io.Reader) error {
- if len(in) != len(dst) {
- panic("internal error: in and dst size do not match")
- }
- size := -1
- for i := range in {
- if in[i] == nil {
- dst[i] = dst[i][:0]
- continue
- }
- n, err := io.ReadFull(in[i], dst[i])
- // The error is EOF only if no bytes were read.
- // If an EOF happens after reading some but not all the bytes,
- // ReadFull returns ErrUnexpectedEOF.
- switch err {
- case io.ErrUnexpectedEOF, io.EOF:
- if size < 0 {
- size = n
- } else if n != size {
- // Shard sizes must match.
- return ErrShardSize
- }
- dst[i] = dst[i][0:n]
- case nil:
- continue
- default:
- return StreamReadError{Err: err, Stream: i}
- }
- }
- if size == 0 {
- return io.EOF
- }
- return nil
-}
-
-func writeShards(out []io.Writer, in [][]byte) error {
- if len(out) != len(in) {
- panic("internal error: in and out size do not match")
- }
- for i := range in {
- if out[i] == nil {
- continue
- }
- n, err := out[i].Write(in[i])
- if err != nil {
- return StreamWriteError{Err: err, Stream: i}
- }
- //
- if n != len(in[i]) {
- return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
- }
- }
- return nil
-}
-
-type readResult struct {
- n int
- size int
- err error
-}
-
-// cReadShards reads shards concurrently
-func cReadShards(dst [][]byte, in []io.Reader) error {
- if len(in) != len(dst) {
- panic("internal error: in and dst size do not match")
- }
- var wg sync.WaitGroup
- wg.Add(len(in))
- res := make(chan readResult, len(in))
- for i := range in {
- if in[i] == nil {
- dst[i] = dst[i][:0]
- wg.Done()
- continue
- }
- go func(i int) {
- defer wg.Done()
- n, err := io.ReadFull(in[i], dst[i])
- // The error is EOF only if no bytes were read.
- // If an EOF happens after reading some but not all the bytes,
- // ReadFull returns ErrUnexpectedEOF.
- res <- readResult{size: n, err: err, n: i}
-
- }(i)
- }
- wg.Wait()
- close(res)
- size := -1
- for r := range res {
- switch r.err {
- case io.ErrUnexpectedEOF, io.EOF:
- if size < 0 {
- size = r.size
- } else if r.size != size {
- // Shard sizes must match.
- return ErrShardSize
- }
- dst[r.n] = dst[r.n][0:r.size]
- case nil:
- default:
- return StreamReadError{Err: r.err, Stream: r.n}
- }
- }
- if size == 0 {
- return io.EOF
- }
- return nil
-}
-
-// cWriteShards writes shards concurrently
-func cWriteShards(out []io.Writer, in [][]byte) error {
- if len(out) != len(in) {
- panic("internal error: in and out size do not match")
- }
- var errs = make(chan error, len(out))
- var wg sync.WaitGroup
- wg.Add(len(out))
- for i := range in {
- go func(i int) {
- defer wg.Done()
- if out[i] == nil {
- errs <- nil
- return
- }
- n, err := out[i].Write(in[i])
- if err != nil {
- errs <- StreamWriteError{Err: err, Stream: i}
- return
- }
- if n != len(in[i]) {
- errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
- }
- }(i)
- }
- wg.Wait()
- close(errs)
- for err := range errs {
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// Verify returns true if the parity shards contain correct data.
-//
-// The number of shards must match the number total data+parity shards
-// given to NewStream().
-//
-// Each reader must supply the same number of bytes.
-// If a shard stream returns an error, a StreamReadError type error
-// will be returned.
-func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
- if len(shards) != r.r.totalShards {
- return false, ErrTooFewShards
- }
-
- read := 0
- all := r.createSlice()
- defer r.blockPool.Put(all)
- for {
- err := r.readShards(all, shards)
- if err == io.EOF {
- if read == 0 {
- return false, ErrShardNoData
- }
- return true, nil
- }
- if err != nil {
- return false, err
- }
- read += shardSize(all)
- ok, err := r.r.Verify(all)
- if !ok || err != nil {
- return ok, err
- }
- }
-}
-
-// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
-// "valid" and "fill" streams on the same index.
-// Therefore it is impossible to see if you consider the shard valid
-// or would like to have it reconstructed.
-var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
-
-// Reconstruct will recreate the missing shards if possible.
-//
-// Given a list of valid shards (to read) and invalid shards (to write)
-//
-// You indicate that a shard is missing by setting it to nil in the 'valid'
-// slice and at the same time setting a non-nil writer in "fill".
-// An index cannot contain both non-nil 'valid' and 'fill' entry.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// The reconstructed shard set is complete when explicitly asked for all missing shards.
-// However its integrity is not automatically verified.
-// Use the Verify function to check in case the data set is complete.
-func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
- if len(valid) != r.r.totalShards {
- return ErrTooFewShards
- }
- if len(fill) != r.r.totalShards {
- return ErrTooFewShards
- }
-
- all := r.createSlice()
- defer r.blockPool.Put(all)
- reconDataOnly := true
- for i := range valid {
- if valid[i] != nil && fill[i] != nil {
- return ErrReconstructMismatch
- }
- if i >= r.r.dataShards && fill[i] != nil {
- reconDataOnly = false
- }
- }
-
- read := 0
- for {
- err := r.readShards(all, valid)
- if err == io.EOF {
- if read == 0 {
- return ErrShardNoData
- }
- return nil
- }
- if err != nil {
- return err
- }
- read += shardSize(all)
- all = trimShards(all, shardSize(all))
-
- if reconDataOnly {
- err = r.r.ReconstructData(all) // just reconstruct missing data shards
- } else {
- err = r.r.Reconstruct(all) // reconstruct all missing shards
- }
- if err != nil {
- return err
- }
- err = r.writeShards(fill, all)
- if err != nil {
- return err
- }
- }
-}
-
-// Join the shards and write the data segment to dst.
-//
-// Only the data shards are considered.
-//
-// You must supply the exact output size you want.
-// If there are to few shards given, ErrTooFewShards will be returned.
-// If the total data size is less than outSize, ErrShortData will be returned.
-func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
- // Do we have enough shards?
- if len(shards) < r.r.dataShards {
- return ErrTooFewShards
- }
-
- // Trim off parity shards if any
- shards = shards[:r.r.dataShards]
- for i := range shards {
- if shards[i] == nil {
- return StreamReadError{Err: ErrShardNoData, Stream: i}
- }
- }
- // Join all shards
- src := io.MultiReader(shards...)
-
- // Copy data to dst
- n, err := io.CopyN(dst, src, outSize)
- if err == io.EOF {
- return ErrShortData
- }
- if err != nil {
- return err
- }
- if n != outSize {
- return ErrShortData
- }
- return nil
-}
-
-// Split a an input stream into the number of shards given to the encoder.
-//
-// The data will be split into equally sized shards.
-// If the data size isn't dividable by the number of shards,
-// the last shard will contain extra zeros.
-//
-// You must supply the total size of your input.
-// 'ErrShortData' will be returned if it is unable to retrieve the
-// number of bytes indicated.
-func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
- if size == 0 {
- return ErrShortData
- }
- if len(dst) != r.r.dataShards {
- return ErrInvShardNum
- }
-
- for i := range dst {
- if dst[i] == nil {
- return StreamWriteError{Err: ErrShardNoData, Stream: i}
- }
- }
-
- // Calculate number of bytes per shard.
- perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
-
- // Pad data to r.Shards*perShard.
- padding := make([]byte, (int64(r.r.totalShards)*perShard)-size)
- data = io.MultiReader(data, bytes.NewBuffer(padding))
-
- // Split into equal-length shards and copy.
- for i := range dst {
- n, err := io.CopyN(dst[i], data, perShard)
- if err != io.EOF && err != nil {
- return err
- }
- if n != perShard {
- return ErrShortData
- }
- }
-
- return nil
-}
diff --git a/vendor/github.com/kuking/go-frodokem/.gitignore b/vendor/github.com/kuking/go-frodokem/.gitignore
deleted file mode 100644
index 3a02bf1..0000000
--- a/vendor/github.com/kuking/go-frodokem/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-*.exe
-*.exe~
-*.dll
-*.so
-*.dylib
-*.test
-*.out
-
-.idea
-bin/
-
diff --git a/vendor/github.com/kuking/go-frodokem/LICENSE b/vendor/github.com/kuking/go-frodokem/LICENSE
deleted file mode 100644
index cb89df3..0000000
--- a/vendor/github.com/kuking/go-frodokem/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2020 Ed Riccardi
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/vendor/github.com/kuking/go-frodokem/Makefile b/vendor/github.com/kuking/go-frodokem/Makefile
deleted file mode 100644
index 6374be6..0000000
--- a/vendor/github.com/kuking/go-frodokem/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-all: clean build test bench coverage
-
-clean:
- go clean -testcache -cache
- rm -f bin/soak_test bin/demo
-
-build:
- go build
- go build -o bin/soak_test mains/soak/soak.go
- go build -o bin/demo mains/demo/demo.go
-
-test:
- go test
-
-bench:
- go test -run=Benchmark -bench=. -benchmem
-
-coverage:
- go test -cover -coverprofile=coverage.out
- go tool cover -func=coverage.out
-
-
diff --git a/vendor/github.com/kuking/go-frodokem/README.md b/vendor/github.com/kuking/go-frodokem/README.md
deleted file mode 100644
index d548558..0000000
--- a/vendor/github.com/kuking/go-frodokem/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# FrodoKEM in Golang
-Golang implementation of FrodoKEM: a Practical quantum-secure key encapsulation from generic lattices (https://frodokem.org).
-This implementation passes all KAT tests from the reference specification for all defined key sizes (640 / 976 / 1344) and variants (AES / SHAKE).
-
-## API
-There is a demo app that uses every method in the API. i.e. methods for listing variants, for creating key pairs,
-encapsulating & dencapsulating kems, auxiliary methods reporting cipher-text length, key-length, variant name, etc.
-You can find it here: [demo.go](mains/demo/demo.go). The built binary will be placed in `bin/demo` (use `make build` to
-generate it).
-
-You can also read the documentation using `go doc -all` in this package, or look at [impl.go](impl.go) and
-[types.go](types.go).
-
-_Complete usage Snippet:_
-```go
-import frodo "github.com/kuking/go-frodokem"
-
-kem := frodo.Frodo640AES()
-pk, sk := kem.Keygen() // public-key, secret-key
-ct, ssEnc, _ := kem.Encapsulate(pk) // cipher-text, shared-secret
-ssDec, _ := kem.Dencapsulate(sk, ct) // recovered shared-secret
-// ssEnc == ssDec
-```
-
-For a full key agreement example, see [agreement_test.go](agreement_test.go).
-
-#### Note on Concurrency
-This library is stateless. A FrodoKEM struct (as returned by i.e. `frodo.Frodo640AES()`) can be used concurrently.
-Keys are immutable `[]byte` and they can be shared between concurrent goroutines.
-
-## Author
-Eduardo E.S. Riccardi, you can contact me via [linkedin](https://uk.linkedin.com/in/kukino), or you could find my email
-address [here](https://kukino.uk/ed@kukino.uk.pub).
-
-## Releases
-
-v1.0.2 - 12 November 2021 - Updated XZ dependency version (only used in tests)
-
-v1.0.1 - 25 June 2020 - Fixed a possible timing attack [#2](https://github.com/kuking/go-frodokem/issues/2)
-
-v1.0.0 - 10 June 2020 - Feature complete.
-
-## Pending
-- implement optimisations with SIMD instructions
-
diff --git a/vendor/github.com/kuking/go-frodokem/impl.go b/vendor/github.com/kuking/go-frodokem/impl.go
deleted file mode 100644
index 71395be..0000000
--- a/vendor/github.com/kuking/go-frodokem/impl.go
+++ /dev/null
@@ -1,535 +0,0 @@
-package go_frodokem
-
-import (
- "crypto/aes"
- "encoding/binary"
- "errors"
- "golang.org/x/crypto/sha3"
- "math"
-)
-
-// Returns the name of this particular FrodoKEM variant, i.e. Frodo640AES
-func (k *FrodoKEM) Name() string {
- return k.name
-}
-
-// Returns the shared secret (in bytes) this variant generates
-func (k *FrodoKEM) SharedSecretLen() int {
- return k.lenSS / 8
-}
-
-// Returns the public key length (in bytes) for this variant
-func (k *FrodoKEM) PublicKeyLen() int {
- return k.lenPkBytes
-}
-
-// Returns the secret key length (in bytes) for this variant
-func (k *FrodoKEM) SecretKeyLen() int {
- return k.lenSkBytes
-}
-
-// Returns the cipher-text length (in bytes) encapsulating the shared secret for this variant
-func (k *FrodoKEM) CipherTextLen() int {
- return k.lenCtBytes
-}
-
-// Generate a key-pair
-func (k *FrodoKEM) Keygen() (pk []uint8, sk []uint8) {
- sSeedSEz := make([]byte, k.lenS/8+k.lenSeedSE/8+k.lenZ/8)
- k.rng(sSeedSEz) // fmt.Println("randomness(", len(sSeedSEz), ")", strings.ToUpper(hex.EncodeToString(sSeedSEz)))
- s := sSeedSEz[0 : k.lenS/8]
- seedSE := sSeedSEz[k.lenS/8 : k.lenS/8+k.lenSeedSE/8] // fmt.Println("seedSE", hex.EncodeToString(seedSE))
- z := sSeedSEz[k.lenS/8+k.lenSeedSE/8 : k.lenS/8+k.lenSeedSE/8+k.lenZ/8]
- seedA := k.shake(z, k.lenSeedA/8) // fmt.Println("seedA(", len(seedA), ")", strings.ToUpper(hex.EncodeToString(seedA)))
- A := k.gen(seedA)
- r := unpackUint16(k.shake(append([]byte{0x5f}, seedSE...), 2*k.n*k.nBar*k.lenChi/8)) //fmt.Println("r(", len(r), ")", r)
- Stransposed := k.sampleMatrix(r[0:k.n*k.nBar], k.nBar, k.n) //fmt.Println("S^T", Stransposed)
- S := matrixTranspose(Stransposed)
- E := k.sampleMatrix(r[k.n*k.nBar:2*k.n*k.nBar], k.n, k.nBar)
- B := matrixAddWithMod(matrixMulWithMod(A, S, k.q), E, k.q)
- b := k.pack(B) // fmt.Println("b", hex.EncodeToString(b))
- pk = append(seedA, b...)
- pkh := k.shake(pk, k.lenPkh/8) // fmt.Println("pkh", strings.ToUpper(hex.EncodeToString(pkh)))
- stb := make([]uint8, len(Stransposed)*len(Stransposed[0])*2)
- stbI := 0
- for i := 0; i < len(Stransposed); i++ {
- for j := 0; j < len(Stransposed[i]); j++ {
- stb[stbI] = uint8(Stransposed[i][j] & 0xff)
- stbI++
- stb[stbI] = uint8(Stransposed[i][j] >> 8)
- stbI++
- }
- }
- sk = append(s, seedA...)
- sk = append(sk, b...)
- sk = append(sk, stb...)
- sk = append(sk, pkh...)
- return
-}
-
-// Generate a KEM returning the cipher-text and shared-secret
-func (k *FrodoKEM) Encapsulate(pk []uint8) (ct []uint8, ssEnc []uint8, err error) {
- if len(pk) != k.lenSeedA/8+k.d*k.n*k.nBar/8 {
- err = errors.New("incorrect public key length")
- return
- }
- seedA := pk[0 : k.lenSeedA/8]
- b := pk[k.lenSeedA/8:]
- mu := make([]uint8, k.lenMu/8)
- k.rng(mu)
- //fmt.Println("seedA", hex.EncodeToString(seedA))
- //fmt.Println("b", hex.EncodeToString(b))
- //fmt.Println("mu", hex.EncodeToString(mu))
- pkh := k.shake(pk, k.lenPkh/8) // fmt.Println("pkh", hex.EncodeToString(pkh))
- seedSE_k := k.shake(append(pkh, mu...), k.lenSeedSE/8+k.lenK/8)
- seedSE := seedSE_k[0 : k.lenSeedSE/8]
- _k := seedSE_k[k.lenSeedSE/8 : k.lenSeedSE/8+k.lenK/8]
- r := unpackUint16(k.shake(append([]byte{0x96}, seedSE...), (2*k.mBar*k.n*k.mBar*k.mBar)*k.lenChi/8))
- Sprime := k.sampleMatrix(r[0:k.mBar*k.n], k.mBar, k.n) // fmt.Println("S'", Sprime)
- Eprime := k.sampleMatrix(r[k.mBar*k.n:2*k.mBar*k.n], k.mBar, k.n) // fmt.Println("E'", Eprime)
- A := k.gen(seedA)
- Bprime := matrixAddWithMod(matrixMulWithMod2(Sprime, A, k.q), Eprime, k.q) // fmt.Println("b'", Bprime)
- c1 := k.pack(Bprime) // fmt.Println("c1", hex.EncodeToString(c1))
- Eprimeprime := k.sampleMatrix(r[2*k.mBar*k.n:2*k.mBar*k.n+k.mBar*k.nBar], k.mBar, k.nBar) // fmt.Println("E''", Eprimeprime)
- B := k.unpack(b, k.n, k.nBar)
- V := matrixAddWithMod(matrixMulWithMod2(Sprime, B, k.q), Eprimeprime, k.q)
- C := uMatrixAdd(V, k.encode(mu), k.q)
- c2 := k.pack(C) // fmt.Println("c2", hex.EncodeToString(c2))
- ct = append(c1, c2...)
- ssEnc = k.shake(append(ct, _k...), k.lenSS/8)
- return
-}
-
-// Returns the shared secret by using the provided cipher-text and secret-key
-func (k *FrodoKEM) Dencapsulate(sk []uint8, ct []uint8) (ssDec []uint8, err error) {
- if len(ct) != k.lenCtBytes {
- err = errors.New("incorrect cipher length")
- return
- }
- if len(sk) != k.lenSkBytes {
- err = errors.New("incorrect secret key length")
- return
- }
-
- c1, c2 := k.unwrapCt(ct)
- s, seedA, b, Stransposed, pkh := k.unwrapSk(sk)
- S := matrixTranspose(Stransposed)
- Bprime := k.unpack(c1, k.mBar, k.n)
- C := k.unpack(c2, k.mBar, k.nBar)
- BprimeS := matrixMulWithMod(Bprime, S, k.q)
- M := matrixSubWithMod(C, BprimeS, k.q)
- muPrime := k.decode(M) // fmt.Println("mu'", hex.EncodeToString(muPrime))
-
- seedSEprime_kprime := k.shake(append(pkh, muPrime...), k.lenSeedSE/8+k.lenK/8)
- seedSEprime := seedSEprime_kprime[0 : k.lenSeedSE/8] // fmt.Println("seedSE'", hex.EncodeToString(seedSEprime))
- kprime := seedSEprime_kprime[k.lenSeedSE/8:] // fmt.Println("k'", hex.EncodeToString(kprime))
-
- r := unpackUint16(k.shake(append([]byte{0x96}, seedSEprime...), (2*k.mBar*k.n+k.mBar*k.mBar)*k.lenChi/8)) // fmt.Println("r", r)
-
- Sprime := k.sampleMatrix(r[0:k.mBar*k.n], k.mBar, k.n)
- Eprime := k.sampleMatrix(r[k.mBar*k.n:2*k.mBar*k.n], k.mBar, k.n)
- A := k.gen(seedA)
- Bprimeprime := matrixAddWithMod(matrixMulWithMod2(Sprime, A, k.q), Eprime, k.q)
-
- Eprimeprime := k.sampleMatrix(r[2*k.mBar*k.n:2*k.mBar*k.n+k.mBar*k.nBar], k.mBar, k.nBar)
- B := k.unpack(b, k.n, k.nBar)
- V := matrixAddWithMod(matrixMulWithMod2(Sprime, B, k.q), Eprimeprime, k.q)
- Cprime := uMatrixAdd(V, k.encode(muPrime), k.q)
-
- if constantUint16Equals(Bprime, Bprimeprime)+constantUint16Equals(C, Cprime) == 2 {
- ssDec = k.shake(append(ct, kprime...), k.lenSS/8)
- } else {
- ssDec = k.shake(append(ct, s...), k.lenSS/8)
- }
- return
-}
-
-// Overrides the default random number generator (crypto/rand)
-func (k *FrodoKEM) OverrideRng(newRng func([]byte)) {
- k.rng = newRng
-}
-
-func (k *FrodoKEM) unwrapCt(ct []uint8) (c1 []uint8, c2 []uint8) {
- ofs := 0
- size := k.mBar * k.n * k.d / 8
- c1 = ct[ofs:size] // fmt.Println("c1", hex.EncodeToString(c1))
- ofs += size
- size = k.mBar * k.mBar * k.d / 8
- c2 = ct[ofs : ofs+size] // fmt.Println("c2", hex.EncodeToString(c2))
- return
-}
-
-func (k *FrodoKEM) unwrapSk(sk []uint8) (s []uint8, seedA []uint8, b []uint8, Stransposed [][]int16, pkh []uint8) {
- ofs := 0
- size := k.lenS / 8
- s = sk[ofs:size] // fmt.Println("s", hex.EncodeToString(s))
- ofs += size
- size = k.lenSeedA / 8
- seedA = sk[ofs : ofs+size] // fmt.Println("seedA", hex.EncodeToString(seedA))
- ofs += size
- size = k.d * k.n * k.nBar / 8
- b = sk[ofs : ofs+size] // fmt.Println("b", hex.EncodeToString(b))
-
- ofs += size
- size = k.n * k.nBar * 2
- Sbytes := sk[ofs : ofs+size]
-
- idx := 0
- Stransposed = make([][]int16, k.nBar)
- for i := 0; i < k.nBar; i++ {
- Stransposed[i] = make([]int16, k.n)
- for j := 0; j < k.n; j++ {
- Stransposed[i][j] = int16(Sbytes[idx])
- idx++
- Stransposed[i][j] |= int16(Sbytes[idx]) << 8
- idx++
- }
- }
- // fmt.Println("S^T", Stransposed)
-
- ofs += size
- size = k.lenPkh / 8
- pkh = sk[ofs : ofs+size] // fmt.Println("pkh", hex.EncodeToString(pkh))
-
- return
-}
-
-func (k *FrodoKEM) sample(r uint16) (e int16) {
- t := int(r >> 1)
- e = 0
- for z := 0; z < len(k.tChi)-1; z++ {
- if t > int(k.tChi[z]) {
- e += 1
- }
- }
- r0 := r % 2
- if r0 == 1 {
- e = -e
- }
- return
-}
-
-func (k *FrodoKEM) sampleMatrix(r []uint16, n1 int, n2 int) (E [][]int16) {
- E = make([][]int16, n1)
- for i := 0; i < n1; i++ {
- E[i] = make([]int16, n2)
- for j := 0; j < n2; j++ {
- E[i][j] = k.sample(r[i*n2+j])
- }
- }
- return E
-}
-
-// FrodoKEM specification, Algorithm 3: Frodo.Pack
-func (k *FrodoKEM) pack(C [][]uint16) (r []byte) {
- rows := len(C)
- cols := len(C[0])
- r = make([]byte, k.d*rows*cols/8)
- var ri = 0
- var packed uint8
- var bits uint8
- for i := 0; i < rows; i++ {
- for j := 0; j < cols; j++ {
- val := C[i][j]
- for b := 0; b < k.d; b++ {
- packed <<= 1
- packed |= uint16BitN(val, k.d-b-1)
- if bits++; bits == 8 {
- r[ri] = packed
- ri++
- packed = 0
- bits = 0
- }
- }
- }
- }
- if bits != 0 {
- r[ri] = packed
- }
- return r
-}
-
-// FrodoKEM specification, Algorithm 4: Frodo.Unpack
-func (k *FrodoKEM) unpack(b []uint8, n1 int, n2 int) (C [][]uint16) {
- bIdx := 0
- BBit := 7
- C = make([][]uint16, n1)
- for i := 0; i < n1; i++ {
- C[i] = make([]uint16, n2)
- for j := 0; j < n2; j++ {
- var val uint16
- for l := 0; l < k.d; l++ {
- val <<= 1
- val |= uint16(uint8BitN(b[bIdx], BBit))
- if BBit--; BBit < 0 {
- BBit = 7
- bIdx++
- }
- }
- C[i][j] = val
- }
- }
- return
-}
-
-// FrodoKEM specification, Algorithm 1
-func (k *FrodoKEM) encode(b []uint8) (K [][]uint16) {
- multiplier := int(k.q)
- if multiplier == 0 {
- multiplier = 65536
- }
- if k.b > 0 {
- multiplier /= 2 << (k.b - 1)
- }
- bIdx := 0
- BBit := 0
- K = make([][]uint16, k.mBar)
- for i := 0; i < k.mBar; i++ {
- K[i] = make([]uint16, k.nBar)
- for j := 0; j < k.nBar; j++ {
- var val uint16
- for l := 0; l < k.b; l++ {
- val |= uint16(uint8BitN(b[bIdx], BBit)) << l
- if BBit++; BBit > 7 {
- BBit = 0
- bIdx++
- }
- }
- K[i][j] = val * uint16(multiplier)
- }
- }
- return
-}
-
-// FrodoKEM specification, Algorithm 2
-func (k *FrodoKEM) decode(K [][]uint16) (b []uint8) {
- b = make([]uint8, k.b*k.mBar*k.nBar/8)
- fixedQ := float64(k.q)
- if k.q == 0 {
- fixedQ = float64(65535)
- }
- twoPowerB := int32(2 << (k.b - 1))
- twoPowerBf := float64(int(2 << (k.b - 1)))
- bIdx := 0
- BBit := 0
- for i := 0; i < k.mBar; i++ {
- for j := 0; j < k.nBar; j++ {
- tmp := uint8(int32(math.Round(float64(K[i][j])*twoPowerBf/fixedQ)) % twoPowerB) //FIXME: please do this better
- for l := 0; l < k.b; l++ {
- if uint8BitN(tmp, l) == 1 {
- b[bIdx] = uint8setBitN(b[bIdx], BBit)
- }
- BBit++
- if BBit == 8 {
- bIdx++
- BBit = 0
- }
- }
- }
- }
- return
-}
-
-func (k *FrodoKEM) genSHAKE128(seedA []byte) (A [][]uint16) {
- var c = make([]byte, 2*k.n)
- var tmp = make([]byte, 2+len(seedA))
- copy(tmp[2:], seedA)
- A = make([][]uint16, k.n)
- for i := 0; i < k.n; i++ {
- A[i] = make([]uint16, k.n)
- binary.LittleEndian.PutUint16(tmp[0:], uint16(i))
- sha3.ShakeSum128(c, tmp)
- for j := 0; j < k.n; j++ {
- A[i][j] = binary.LittleEndian.Uint16(c[j*2 : (j+1)*2])
- if k.q != 0 {
- A[i][j] %= k.q
- }
- }
- }
- return
-}
-
-func (k *FrodoKEM) genAES128(seedA []byte) (A [][]uint16) {
- A = make([][]uint16, k.n)
- cipher, err := aes.NewCipher(seedA)
- if err != nil {
- panic(err)
- }
- var b = [16]byte{}
- var c = [16]byte{}
- for i := 0; i < k.n; i++ {
- A[i] = make([]uint16, k.n)
- for j := 0; j < k.n; j += 8 {
- binary.LittleEndian.PutUint16(b[0:2], uint16(i))
- binary.LittleEndian.PutUint16(b[2:4], uint16(j))
- cipher.Encrypt(c[:], b[:])
- for l := 0; l < 8; l++ {
- A[i][j+l] = binary.LittleEndian.Uint16(c[l*2 : (l+1)*2])
- if k.q != 0 {
- A[i][j+l] %= k.q
- }
- }
-
- }
- }
- return
-}
-
-// constant time [][]uint16 equals, 1=true, 0=false
-func constantUint16Equals(a [][]uint16, b [][]uint16) (ret int) {
- ret = 1
- if len(a) != len(b) {
- panic("Can not compare matrices of different size")
- }
- for i := 0; i < len(a); i++ {
- if len(a[i]) != len(b[i]) {
- panic("Can not compare matrices of different size")
- }
- for j := 0; j < len(a[i]); j++ {
- if a[i][j] != b[i][j] {
- ret = 0
- }
- }
- }
- return
-}
-
-func matrixAddWithMod(X [][]uint16, Y [][]int16, q uint16) (R [][]uint16) {
- nrowsx := len(X)
- ncolsx := len(X[0])
- nrowsy := len(Y)
- ncolsy := len(Y[0])
- if nrowsx != nrowsy || ncolsx != ncolsy {
- panic("can't add these matrices")
- }
- R = make([][]uint16, nrowsx)
- for i := 0; i < nrowsx; i++ {
- R[i] = make([]uint16, ncolsx)
- for j := 0; j < ncolsx; j++ {
- R[i][j] = uint16(int16(X[i][j]) + Y[i][j])
- if q != 0 {
- R[i][j] %= q
- }
- }
- }
- return
-}
-
-func uMatrixAdd(X [][]uint16, Y [][]uint16, q uint16) (R [][]uint16) {
- nrowsx := len(X)
- ncolsx := len(X[0])
- nrowsy := len(Y)
- ncolsy := len(Y[0])
- if nrowsx != nrowsy || ncolsx != ncolsy {
- panic("can't add these matrices")
- }
- R = make([][]uint16, nrowsx)
- for i := 0; i < nrowsx; i++ {
- R[i] = make([]uint16, ncolsx)
- for j := 0; j < ncolsx; j++ {
- R[i][j] = X[i][j] + Y[i][j]
- if q != 0 {
- R[i][j] %= q
- }
- }
- }
- return
-}
-
-func matrixSubWithMod(X [][]uint16, Y [][]uint16, q uint16) (R [][]uint16) {
- nrowsx := len(X)
- ncolsx := len(X[0])
- nrowsy := len(Y)
- ncolsy := len(Y[0])
- if nrowsx != nrowsy || ncolsx != ncolsy {
- panic("can't sub these matrices")
- }
- R = make([][]uint16, nrowsx)
- for i := 0; i < nrowsx; i++ {
- R[i] = make([]uint16, ncolsx)
- for j := 0; j < ncolsx; j++ {
- R[i][j] = X[i][j] - Y[i][j]
- if q != 0 {
- R[i][j] %= q
- }
- }
- }
- return
-}
-
-func matrixMulWithMod(X [][]uint16, Y [][]int16, q uint16) (R [][]uint16) {
- nrowsx := len(X)
- ncolsx := len(X[0])
- //nrowsy := len(y)
- ncolsy := len(Y[0])
- R = make([][]uint16, nrowsx)
- for i := 0; i < nrowsx; i++ {
- R[i] = make([]uint16, ncolsy)
- for j := 0; j < ncolsy; j++ {
- var res uint16
- for k := 0; k < ncolsx; k++ {
- res += uint16(int16(X[i][k]) * Y[k][j])
- }
- if q != 0 {
- res %= q
- }
- R[i][j] = res
- }
- }
- return
-}
-
-func matrixMulWithMod2(X [][]int16, Y [][]uint16, q uint16) (R [][]uint16) {
- nrowsx := len(X)
- ncolsx := len(X[0])
- //nrowsy := len(y)
- ncolsy := len(Y[0])
- R = make([][]uint16, nrowsx)
- for i := 0; i < nrowsx; i++ {
- R[i] = make([]uint16, ncolsy)
- for j := 0; j < ncolsy; j++ {
- var res uint16
- for k := 0; k < ncolsx; k++ {
- res += uint16(X[i][k] * int16(Y[k][j]))
- }
- if q != 0 {
- res %= q
- }
- R[i][j] = res
- }
- }
- return
-}
-
-func matrixTranspose(O [][]int16) (T [][]int16) {
- T = make([][]int16, len(O[0]))
- for x := 0; x < len(T); x++ {
- T[x] = make([]int16, len(O))
- for y := 0; y < len(O); y++ {
- T[x][y] = O[y][x]
- }
- }
- return
-}
-
-func unpackUint16(bytes []byte) (r []uint16) {
- r = make([]uint16, len(bytes)/2)
- j := 0
- for i := 0; i+1 < len(bytes); i += 2 {
- r[j] = binary.LittleEndian.Uint16(bytes[i : i+2])
- j++
- }
- return r
-}
-
-func uint8setBitN(val uint8, i int) uint8 {
- return val | (1 << i)
-}
-
-func uint16BitN(val uint16, i int) uint8 {
- return uint8((val >> i) & 1)
-}
-
-func uint8BitN(val uint8, i int) uint8 {
- return (val >> i) & 1
-}
diff --git a/vendor/github.com/kuking/go-frodokem/types.go b/vendor/github.com/kuking/go-frodokem/types.go
deleted file mode 100644
index 045db1c..0000000
--- a/vendor/github.com/kuking/go-frodokem/types.go
+++ /dev/null
@@ -1,202 +0,0 @@
-package go_frodokem
-
-import (
- "crypto/rand"
- "golang.org/x/crypto/sha3"
-)
-
-var variants = []FrodoKEM{
- Frodo640AES(), Frodo640SHAKE(),
- Frodo976AES(), Frodo976SHAKE(),
- Frodo1344AES(), Frodo1344SHAKE(),
-}
-
-// Returns all the FrodoKEM variants supported as an array
-func Variants() []FrodoKEM {
- return variants
-}
-
-type FrodoKEM struct {
- // error_distribution
- name string
- errDistribution []uint16
- tChi []uint16
- d int
- q uint16
- n int
- nBar int
- mBar int
- b int
- lenSeedA int
- lenZ int
- lenMu int
- lenSeedSE int
- lenS int
- lenK int
- lenPkh int
- lenSS int
- lenChi int
- lenSkBytes int
- lenPkBytes int
- lenCtBytes int
- shake func(msg []byte, digestLength int) []byte
- gen func([]byte) [][]uint16
- rng func([]byte)
-}
-
-// Returns a new FrodoKEM 640 AES variant (Generates 128 bits of secret)
-func Frodo640AES() (f FrodoKEM) {
- f = FrodoKEM{
- name: "Frodo640AES",
- errDistribution: []uint16{9288, 8720, 7216, 5264, 3384, 1918, 958, 422, 164, 56, 17, 4, 1},
- d: 15,
- q: 32768,
- n: 640,
- nBar: 8,
- mBar: 8,
- b: 2,
- lenSeedA: 128,
- lenZ: 128,
- lenMu: 128,
- lenSeedSE: 128,
- lenS: 128,
- lenK: 128,
- lenPkh: 128,
- lenSS: 128,
- lenChi: 16,
- lenSkBytes: 19888,
- lenPkBytes: 9616,
- lenCtBytes: 9720,
- shake: shake128,
- rng: cryptoRand,
- }
- f.tChi = cdfZeroCentredSymmetric(f.errDistribution)
- f.gen = f.genAES128
- return
-}
-
-// Returns a new FrodoKEM 640 SHAKE variant (Generates 128 bits of secret)
-func Frodo640SHAKE() (f FrodoKEM) {
- f = Frodo640AES()
- f.name = "Frodo640Shake"
- f.shake = shake128
- f.gen = f.genSHAKE128
- return
-}
-
-// Returns a new FrodoKEM 976 AES variant (Generates 192 bits of secret)
-func Frodo976AES() (f FrodoKEM) {
- f = FrodoKEM{
- name: "Frodo976AES",
- errDistribution: []uint16{11278, 10277, 7774, 4882, 2545, 1101, 396, 118, 29, 6, 1},
- d: 16,
- q: 0, // means no mod in 16 bits uint
- n: 976,
- nBar: 8,
- mBar: 8,
- b: 3,
- lenSeedA: 128,
- lenZ: 128,
- lenMu: 192,
- lenSeedSE: 192,
- lenS: 192,
- lenK: 192,
- lenPkh: 192,
- lenSS: 192,
- lenChi: 16,
- lenSkBytes: 31296,
- lenPkBytes: 15632,
- lenCtBytes: 15744,
- shake: shake256,
- rng: cryptoRand,
- }
- f.tChi = cdfZeroCentredSymmetric(f.errDistribution)
- f.gen = f.genAES128
- return
-}
-
-// Returns a new FrodoKEM 976 SHAKE variant (Generates 192 bits of secret)
-func Frodo976SHAKE() (f FrodoKEM) {
- f = Frodo976AES()
- f.name = "Frodo976Shake"
- f.gen = f.genSHAKE128
- return
-}
-
-// Returns a new FrodoKEM 1344 AES variant (Generates 256 bits of secret)
-func Frodo1344AES() (f FrodoKEM) {
- f = FrodoKEM{
- name: "Frodo1344AES",
- errDistribution: []uint16{18286, 14320, 6876, 2023, 364, 40, 2},
- d: 16,
- q: 0,
- n: 1344,
- nBar: 8,
- mBar: 8,
- b: 4,
- lenSeedA: 128,
- lenZ: 128,
- lenMu: 256,
- lenSeedSE: 256,
- lenS: 256,
- lenK: 256,
- lenPkh: 256,
- lenSS: 256,
- lenChi: 16,
- lenSkBytes: 43088,
- lenPkBytes: 21520,
- lenCtBytes: 21632,
- shake: shake256,
- rng: cryptoRand,
- }
- f.tChi = cdfZeroCentredSymmetric(f.errDistribution)
- f.gen = f.genAES128
- return
-}
-
-// Returns a new FrodoKEM 1344 SHAKE variant (Generates 256 bits of secret)
-func Frodo1344SHAKE() (f FrodoKEM) {
- f = Frodo1344AES()
- f.name = "Frodo1344Shake"
- f.gen = f.genSHAKE128
- return
-}
-
-func sumUint16s(arr []uint16) (r uint16) {
- r = 0
- for _, v := range arr {
- r += v
- }
- return
-}
-
-func cdfZeroCentredSymmetric(chi []uint16) (tChi []uint16) {
- tChi = make([]uint16, len(chi))
- tChi[0] = (chi[0] / 2) - 1
- for z := 1; z < len(chi); z++ {
- tChi[z] = tChi[0] + sumUint16s(chi[1:z+1])
- }
- return
-}
-
-func cryptoRand(target []byte) {
- n, err := rand.Read(target)
- if err != nil {
- panic(err)
- }
- if len(target) != n {
- panic("could not generate enough randomness")
- }
-}
-
-func shake128(msg []byte, size int) (hash []byte) {
- hash = make([]byte, size)
- sha3.ShakeSum128(hash, msg)
- return
-}
-
-func shake256(msg []byte, size int) (hash []byte) {
- hash = make([]byte, size)
- sha3.ShakeSum256(hash, msg)
- return
-}
diff --git a/vendor/github.com/mattn/go-isatty/LICENSE b/vendor/github.com/mattn/go-isatty/LICENSE
deleted file mode 100644
index 65dc692..0000000
--- a/vendor/github.com/mattn/go-isatty/LICENSE
+++ /dev/null
@@ -1,9 +0,0 @@
-Copyright (c) Yasuhiro MATSUMOTO
-
-MIT License (Expat)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/mattn/go-isatty/README.md b/vendor/github.com/mattn/go-isatty/README.md
deleted file mode 100644
index 3841835..0000000
--- a/vendor/github.com/mattn/go-isatty/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# go-isatty
-
-[![Godoc Reference](https://godoc.org/github.com/mattn/go-isatty?status.svg)](http://godoc.org/github.com/mattn/go-isatty)
-[![Codecov](https://codecov.io/gh/mattn/go-isatty/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-isatty)
-[![Coverage Status](https://coveralls.io/repos/github/mattn/go-isatty/badge.svg?branch=master)](https://coveralls.io/github/mattn/go-isatty?branch=master)
-[![Go Report Card](https://goreportcard.com/badge/mattn/go-isatty)](https://goreportcard.com/report/mattn/go-isatty)
-
-isatty for golang
-
-## Usage
-
-```go
-package main
-
-import (
- "fmt"
- "github.com/mattn/go-isatty"
- "os"
-)
-
-func main() {
- if isatty.IsTerminal(os.Stdout.Fd()) {
- fmt.Println("Is Terminal")
- } else if isatty.IsCygwinTerminal(os.Stdout.Fd()) {
- fmt.Println("Is Cygwin/MSYS2 Terminal")
- } else {
- fmt.Println("Is Not Terminal")
- }
-}
-```
-
-## Installation
-
-```
-$ go get github.com/mattn/go-isatty
-```
-
-## License
-
-MIT
-
-## Author
-
-Yasuhiro Matsumoto (a.k.a mattn)
-
-## Thanks
-
-* k-takata: base idea for IsCygwinTerminal
-
- https://github.com/k-takata/go-iscygpty
diff --git a/vendor/github.com/mattn/go-isatty/doc.go b/vendor/github.com/mattn/go-isatty/doc.go
deleted file mode 100644
index 17d4f90..0000000
--- a/vendor/github.com/mattn/go-isatty/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package isatty implements interface to isatty
-package isatty
diff --git a/vendor/github.com/mattn/go-isatty/go.test.sh b/vendor/github.com/mattn/go-isatty/go.test.sh
deleted file mode 100644
index 012162b..0000000
--- a/vendor/github.com/mattn/go-isatty/go.test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-echo "" > coverage.txt
-
-for d in $(go list ./... | grep -v vendor); do
- go test -race -coverprofile=profile.out -covermode=atomic "$d"
- if [ -f profile.out ]; then
- cat profile.out >> coverage.txt
- rm profile.out
- fi
-done
diff --git a/vendor/github.com/mattn/go-isatty/isatty_bsd.go b/vendor/github.com/mattn/go-isatty/isatty_bsd.go
deleted file mode 100644
index 39bbcf0..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_bsd.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build (darwin || freebsd || openbsd || netbsd || dragonfly) && !appengine
-// +build darwin freebsd openbsd netbsd dragonfly
-// +build !appengine
-
-package isatty
-
-import "golang.org/x/sys/unix"
-
-// IsTerminal return true if the file descriptor is terminal.
-func IsTerminal(fd uintptr) bool {
- _, err := unix.IoctlGetTermios(int(fd), unix.TIOCGETA)
- return err == nil
-}
-
-// IsCygwinTerminal return true if the file descriptor is a cygwin or msys2
-// terminal. This is also always false on this environment.
-func IsCygwinTerminal(fd uintptr) bool {
- return false
-}
diff --git a/vendor/github.com/mattn/go-isatty/isatty_others.go b/vendor/github.com/mattn/go-isatty/isatty_others.go
deleted file mode 100644
index 3150322..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_others.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build appengine || js || nacl || wasm
-// +build appengine js nacl wasm
-
-package isatty
-
-// IsTerminal returns true if the file descriptor is terminal which
-// is always false on js and appengine classic which is a sandboxed PaaS.
-func IsTerminal(fd uintptr) bool {
- return false
-}
-
-// IsCygwinTerminal() return true if the file descriptor is a cygwin or msys2
-// terminal. This is also always false on this environment.
-func IsCygwinTerminal(fd uintptr) bool {
- return false
-}
diff --git a/vendor/github.com/mattn/go-isatty/isatty_plan9.go b/vendor/github.com/mattn/go-isatty/isatty_plan9.go
deleted file mode 100644
index bae7f9b..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_plan9.go
+++ /dev/null
@@ -1,23 +0,0 @@
-//go:build plan9
-// +build plan9
-
-package isatty
-
-import (
- "syscall"
-)
-
-// IsTerminal returns true if the given file descriptor is a terminal.
-func IsTerminal(fd uintptr) bool {
- path, err := syscall.Fd2path(int(fd))
- if err != nil {
- return false
- }
- return path == "/dev/cons" || path == "/mnt/term/dev/cons"
-}
-
-// IsCygwinTerminal return true if the file descriptor is a cygwin or msys2
-// terminal. This is also always false on this environment.
-func IsCygwinTerminal(fd uintptr) bool {
- return false
-}
diff --git a/vendor/github.com/mattn/go-isatty/isatty_solaris.go b/vendor/github.com/mattn/go-isatty/isatty_solaris.go
deleted file mode 100644
index 0c3acf2..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_solaris.go
+++ /dev/null
@@ -1,21 +0,0 @@
-//go:build solaris && !appengine
-// +build solaris,!appengine
-
-package isatty
-
-import (
- "golang.org/x/sys/unix"
-)
-
-// IsTerminal returns true if the given file descriptor is a terminal.
-// see: https://src.illumos.org/source/xref/illumos-gate/usr/src/lib/libc/port/gen/isatty.c
-func IsTerminal(fd uintptr) bool {
- _, err := unix.IoctlGetTermio(int(fd), unix.TCGETA)
- return err == nil
-}
-
-// IsCygwinTerminal return true if the file descriptor is a cygwin or msys2
-// terminal. This is also always false on this environment.
-func IsCygwinTerminal(fd uintptr) bool {
- return false
-}
diff --git a/vendor/github.com/mattn/go-isatty/isatty_tcgets.go b/vendor/github.com/mattn/go-isatty/isatty_tcgets.go
deleted file mode 100644
index 6778765..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_tcgets.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build (linux || aix || zos) && !appengine
-// +build linux aix zos
-// +build !appengine
-
-package isatty
-
-import "golang.org/x/sys/unix"
-
-// IsTerminal return true if the file descriptor is terminal.
-func IsTerminal(fd uintptr) bool {
- _, err := unix.IoctlGetTermios(int(fd), unix.TCGETS)
- return err == nil
-}
-
-// IsCygwinTerminal return true if the file descriptor is a cygwin or msys2
-// terminal. This is also always false on this environment.
-func IsCygwinTerminal(fd uintptr) bool {
- return false
-}
diff --git a/vendor/github.com/mattn/go-isatty/isatty_windows.go b/vendor/github.com/mattn/go-isatty/isatty_windows.go
deleted file mode 100644
index 8e3c991..0000000
--- a/vendor/github.com/mattn/go-isatty/isatty_windows.go
+++ /dev/null
@@ -1,125 +0,0 @@
-//go:build windows && !appengine
-// +build windows,!appengine
-
-package isatty
-
-import (
- "errors"
- "strings"
- "syscall"
- "unicode/utf16"
- "unsafe"
-)
-
-const (
- objectNameInfo uintptr = 1
- fileNameInfo = 2
- fileTypePipe = 3
-)
-
-var (
- kernel32 = syscall.NewLazyDLL("kernel32.dll")
- ntdll = syscall.NewLazyDLL("ntdll.dll")
- procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
- procGetFileInformationByHandleEx = kernel32.NewProc("GetFileInformationByHandleEx")
- procGetFileType = kernel32.NewProc("GetFileType")
- procNtQueryObject = ntdll.NewProc("NtQueryObject")
-)
-
-func init() {
- // Check if GetFileInformationByHandleEx is available.
- if procGetFileInformationByHandleEx.Find() != nil {
- procGetFileInformationByHandleEx = nil
- }
-}
-
-// IsTerminal return true if the file descriptor is terminal.
-func IsTerminal(fd uintptr) bool {
- var st uint32
- r, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, fd, uintptr(unsafe.Pointer(&st)), 0)
- return r != 0 && e == 0
-}
-
-// Check pipe name is used for cygwin/msys2 pty.
-// Cygwin/MSYS2 PTY has a name like:
-// \{cygwin,msys}-XXXXXXXXXXXXXXXX-ptyN-{from,to}-master
-func isCygwinPipeName(name string) bool {
- token := strings.Split(name, "-")
- if len(token) < 5 {
- return false
- }
-
- if token[0] != `\msys` &&
- token[0] != `\cygwin` &&
- token[0] != `\Device\NamedPipe\msys` &&
- token[0] != `\Device\NamedPipe\cygwin` {
- return false
- }
-
- if token[1] == "" {
- return false
- }
-
- if !strings.HasPrefix(token[2], "pty") {
- return false
- }
-
- if token[3] != `from` && token[3] != `to` {
- return false
- }
-
- if token[4] != "master" {
- return false
- }
-
- return true
-}
-
-// getFileNameByHandle use the undocomented ntdll NtQueryObject to get file full name from file handler
-// since GetFileInformationByHandleEx is not available under windows Vista and still some old fashion
-// guys are using Windows XP, this is a workaround for those guys, it will also work on system from
-// Windows vista to 10
-// see https://stackoverflow.com/a/18792477 for details
-func getFileNameByHandle(fd uintptr) (string, error) {
- if procNtQueryObject == nil {
- return "", errors.New("ntdll.dll: NtQueryObject not supported")
- }
-
- var buf [4 + syscall.MAX_PATH]uint16
- var result int
- r, _, e := syscall.Syscall6(procNtQueryObject.Addr(), 5,
- fd, objectNameInfo, uintptr(unsafe.Pointer(&buf)), uintptr(2*len(buf)), uintptr(unsafe.Pointer(&result)), 0)
- if r != 0 {
- return "", e
- }
- return string(utf16.Decode(buf[4 : 4+buf[0]/2])), nil
-}
-
-// IsCygwinTerminal() return true if the file descriptor is a cygwin or msys2
-// terminal.
-func IsCygwinTerminal(fd uintptr) bool {
- if procGetFileInformationByHandleEx == nil {
- name, err := getFileNameByHandle(fd)
- if err != nil {
- return false
- }
- return isCygwinPipeName(name)
- }
-
- // Cygwin/msys's pty is a pipe.
- ft, _, e := syscall.Syscall(procGetFileType.Addr(), 1, fd, 0, 0)
- if ft != fileTypePipe || e != 0 {
- return false
- }
-
- var buf [2 + syscall.MAX_PATH]uint16
- r, _, e := syscall.Syscall6(procGetFileInformationByHandleEx.Addr(),
- 4, fd, fileNameInfo, uintptr(unsafe.Pointer(&buf)),
- uintptr(len(buf)*2), 0, 0)
- if r == 0 || e != 0 {
- return false
- }
-
- l := *(*uint32)(unsafe.Pointer(&buf))
- return isCygwinPipeName(string(utf16.Decode(buf[2 : 2+l/2])))
-}
diff --git a/vendor/github.com/pkg/errors/.gitignore b/vendor/github.com/pkg/errors/.gitignore
deleted file mode 100644
index daf913b..0000000
--- a/vendor/github.com/pkg/errors/.gitignore
+++ /dev/null
@@ -1,24 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
diff --git a/vendor/github.com/pkg/errors/.travis.yml b/vendor/github.com/pkg/errors/.travis.yml
deleted file mode 100644
index 9159de0..0000000
--- a/vendor/github.com/pkg/errors/.travis.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-language: go
-go_import_path: github.com/pkg/errors
-go:
- - 1.11.x
- - 1.12.x
- - 1.13.x
- - tip
-
-script:
- - make check
diff --git a/vendor/github.com/pkg/errors/LICENSE b/vendor/github.com/pkg/errors/LICENSE
deleted file mode 100644
index 835ba3e..0000000
--- a/vendor/github.com/pkg/errors/LICENSE
+++ /dev/null
@@ -1,23 +0,0 @@
-Copyright (c) 2015, Dave Cheney
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/pkg/errors/Makefile b/vendor/github.com/pkg/errors/Makefile
deleted file mode 100644
index ce9d7cd..0000000
--- a/vendor/github.com/pkg/errors/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-PKGS := github.com/pkg/errors
-SRCDIRS := $(shell go list -f '{{.Dir}}' $(PKGS))
-GO := go
-
-check: test vet gofmt misspell unconvert staticcheck ineffassign unparam
-
-test:
- $(GO) test $(PKGS)
-
-vet: | test
- $(GO) vet $(PKGS)
-
-staticcheck:
- $(GO) get honnef.co/go/tools/cmd/staticcheck
- staticcheck -checks all $(PKGS)
-
-misspell:
- $(GO) get github.com/client9/misspell/cmd/misspell
- misspell \
- -locale GB \
- -error \
- *.md *.go
-
-unconvert:
- $(GO) get github.com/mdempsky/unconvert
- unconvert -v $(PKGS)
-
-ineffassign:
- $(GO) get github.com/gordonklaus/ineffassign
- find $(SRCDIRS) -name '*.go' | xargs ineffassign
-
-pedantic: check errcheck
-
-unparam:
- $(GO) get mvdan.cc/unparam
- unparam ./...
-
-errcheck:
- $(GO) get github.com/kisielk/errcheck
- errcheck $(PKGS)
-
-gofmt:
- @echo Checking code is gofmted
- @test -z "$(shell gofmt -s -l -d -e $(SRCDIRS) | tee /dev/stderr)"
diff --git a/vendor/github.com/pkg/errors/README.md b/vendor/github.com/pkg/errors/README.md
deleted file mode 100644
index 54dfdcb..0000000
--- a/vendor/github.com/pkg/errors/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# errors [![Travis-CI](https://travis-ci.org/pkg/errors.svg)](https://travis-ci.org/pkg/errors) [![AppVeyor](https://ci.appveyor.com/api/projects/status/b98mptawhudj53ep/branch/master?svg=true)](https://ci.appveyor.com/project/davecheney/errors/branch/master) [![GoDoc](https://godoc.org/github.com/pkg/errors?status.svg)](http://godoc.org/github.com/pkg/errors) [![Report card](https://goreportcard.com/badge/github.com/pkg/errors)](https://goreportcard.com/report/github.com/pkg/errors) [![Sourcegraph](https://sourcegraph.com/github.com/pkg/errors/-/badge.svg)](https://sourcegraph.com/github.com/pkg/errors?badge)
-
-Package errors provides simple error handling primitives.
-
-`go get github.com/pkg/errors`
-
-The traditional error handling idiom in Go is roughly akin to
-```go
-if err != nil {
- return err
-}
-```
-which applied recursively up the call stack results in error reports without context or debugging information. The errors package allows programmers to add context to the failure path in their code in a way that does not destroy the original value of the error.
-
-## Adding context to an error
-
-The errors.Wrap function returns a new error that adds context to the original error. For example
-```go
-_, err := ioutil.ReadAll(r)
-if err != nil {
- return errors.Wrap(err, "read failed")
-}
-```
-## Retrieving the cause of an error
-
-Using `errors.Wrap` constructs a stack of errors, adding context to the preceding error. Depending on the nature of the error it may be necessary to reverse the operation of errors.Wrap to retrieve the original error for inspection. Any error value which implements this interface can be inspected by `errors.Cause`.
-```go
-type causer interface {
- Cause() error
-}
-```
-`errors.Cause` will recursively retrieve the topmost error which does not implement `causer`, which is assumed to be the original cause. For example:
-```go
-switch err := errors.Cause(err).(type) {
-case *MyError:
- // handle specifically
-default:
- // unknown error
-}
-```
-
-[Read the package documentation for more information](https://godoc.org/github.com/pkg/errors).
-
-## Roadmap
-
-With the upcoming [Go2 error proposals](https://go.googlesource.com/proposal/+/master/design/go2draft.md) this package is moving into maintenance mode. The roadmap for a 1.0 release is as follows:
-
-- 0.9. Remove pre Go 1.9 and Go 1.10 support, address outstanding pull requests (if possible)
-- 1.0. Final release.
-
-## Contributing
-
-Because of the Go2 errors changes, this package is not accepting proposals for new functionality. With that said, we welcome pull requests, bug fixes and issue reports.
-
-Before sending a PR, please discuss your change by raising an issue.
-
-## License
-
-BSD-2-Clause
diff --git a/vendor/github.com/pkg/errors/appveyor.yml b/vendor/github.com/pkg/errors/appveyor.yml
deleted file mode 100644
index a932ead..0000000
--- a/vendor/github.com/pkg/errors/appveyor.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-version: build-{build}.{branch}
-
-clone_folder: C:\gopath\src\github.com\pkg\errors
-shallow_clone: true # for startup speed
-
-environment:
- GOPATH: C:\gopath
-
-platform:
- - x64
-
-# http://www.appveyor.com/docs/installed-software
-install:
- # some helpful output for debugging builds
- - go version
- - go env
- # pre-installed MinGW at C:\MinGW is 32bit only
- # but MSYS2 at C:\msys64 has mingw64
- - set PATH=C:\msys64\mingw64\bin;%PATH%
- - gcc --version
- - g++ --version
-
-build_script:
- - go install -v ./...
-
-test_script:
- - set PATH=C:\gopath\bin;%PATH%
- - go test -v ./...
-
-#artifacts:
-# - path: '%GOPATH%\bin\*.exe'
-deploy: off
diff --git a/vendor/github.com/pkg/errors/errors.go b/vendor/github.com/pkg/errors/errors.go
deleted file mode 100644
index 161aea2..0000000
--- a/vendor/github.com/pkg/errors/errors.go
+++ /dev/null
@@ -1,288 +0,0 @@
-// Package errors provides simple error handling primitives.
-//
-// The traditional error handling idiom in Go is roughly akin to
-//
-// if err != nil {
-// return err
-// }
-//
-// which when applied recursively up the call stack results in error reports
-// without context or debugging information. The errors package allows
-// programmers to add context to the failure path in their code in a way
-// that does not destroy the original value of the error.
-//
-// Adding context to an error
-//
-// The errors.Wrap function returns a new error that adds context to the
-// original error by recording a stack trace at the point Wrap is called,
-// together with the supplied message. For example
-//
-// _, err := ioutil.ReadAll(r)
-// if err != nil {
-// return errors.Wrap(err, "read failed")
-// }
-//
-// If additional control is required, the errors.WithStack and
-// errors.WithMessage functions destructure errors.Wrap into its component
-// operations: annotating an error with a stack trace and with a message,
-// respectively.
-//
-// Retrieving the cause of an error
-//
-// Using errors.Wrap constructs a stack of errors, adding context to the
-// preceding error. Depending on the nature of the error it may be necessary
-// to reverse the operation of errors.Wrap to retrieve the original error
-// for inspection. Any error value which implements this interface
-//
-// type causer interface {
-// Cause() error
-// }
-//
-// can be inspected by errors.Cause. errors.Cause will recursively retrieve
-// the topmost error that does not implement causer, which is assumed to be
-// the original cause. For example:
-//
-// switch err := errors.Cause(err).(type) {
-// case *MyError:
-// // handle specifically
-// default:
-// // unknown error
-// }
-//
-// Although the causer interface is not exported by this package, it is
-// considered a part of its stable public interface.
-//
-// Formatted printing of errors
-//
-// All error values returned from this package implement fmt.Formatter and can
-// be formatted by the fmt package. The following verbs are supported:
-//
-// %s print the error. If the error has a Cause it will be
-// printed recursively.
-// %v see %s
-// %+v extended format. Each Frame of the error's StackTrace will
-// be printed in detail.
-//
-// Retrieving the stack trace of an error or wrapper
-//
-// New, Errorf, Wrap, and Wrapf record a stack trace at the point they are
-// invoked. This information can be retrieved with the following interface:
-//
-// type stackTracer interface {
-// StackTrace() errors.StackTrace
-// }
-//
-// The returned errors.StackTrace type is defined as
-//
-// type StackTrace []Frame
-//
-// The Frame type represents a call site in the stack trace. Frame supports
-// the fmt.Formatter interface that can be used for printing information about
-// the stack trace of this error. For example:
-//
-// if err, ok := err.(stackTracer); ok {
-// for _, f := range err.StackTrace() {
-// fmt.Printf("%+s:%d\n", f, f)
-// }
-// }
-//
-// Although the stackTracer interface is not exported by this package, it is
-// considered a part of its stable public interface.
-//
-// See the documentation for Frame.Format for more details.
-package errors
-
-import (
- "fmt"
- "io"
-)
-
-// New returns an error with the supplied message.
-// New also records the stack trace at the point it was called.
-func New(message string) error {
- return &fundamental{
- msg: message,
- stack: callers(),
- }
-}
-
-// Errorf formats according to a format specifier and returns the string
-// as a value that satisfies error.
-// Errorf also records the stack trace at the point it was called.
-func Errorf(format string, args ...interface{}) error {
- return &fundamental{
- msg: fmt.Sprintf(format, args...),
- stack: callers(),
- }
-}
-
-// fundamental is an error that has a message and a stack, but no caller.
-type fundamental struct {
- msg string
- *stack
-}
-
-func (f *fundamental) Error() string { return f.msg }
-
-func (f *fundamental) Format(s fmt.State, verb rune) {
- switch verb {
- case 'v':
- if s.Flag('+') {
- io.WriteString(s, f.msg)
- f.stack.Format(s, verb)
- return
- }
- fallthrough
- case 's':
- io.WriteString(s, f.msg)
- case 'q':
- fmt.Fprintf(s, "%q", f.msg)
- }
-}
-
-// WithStack annotates err with a stack trace at the point WithStack was called.
-// If err is nil, WithStack returns nil.
-func WithStack(err error) error {
- if err == nil {
- return nil
- }
- return &withStack{
- err,
- callers(),
- }
-}
-
-type withStack struct {
- error
- *stack
-}
-
-func (w *withStack) Cause() error { return w.error }
-
-// Unwrap provides compatibility for Go 1.13 error chains.
-func (w *withStack) Unwrap() error { return w.error }
-
-func (w *withStack) Format(s fmt.State, verb rune) {
- switch verb {
- case 'v':
- if s.Flag('+') {
- fmt.Fprintf(s, "%+v", w.Cause())
- w.stack.Format(s, verb)
- return
- }
- fallthrough
- case 's':
- io.WriteString(s, w.Error())
- case 'q':
- fmt.Fprintf(s, "%q", w.Error())
- }
-}
-
-// Wrap returns an error annotating err with a stack trace
-// at the point Wrap is called, and the supplied message.
-// If err is nil, Wrap returns nil.
-func Wrap(err error, message string) error {
- if err == nil {
- return nil
- }
- err = &withMessage{
- cause: err,
- msg: message,
- }
- return &withStack{
- err,
- callers(),
- }
-}
-
-// Wrapf returns an error annotating err with a stack trace
-// at the point Wrapf is called, and the format specifier.
-// If err is nil, Wrapf returns nil.
-func Wrapf(err error, format string, args ...interface{}) error {
- if err == nil {
- return nil
- }
- err = &withMessage{
- cause: err,
- msg: fmt.Sprintf(format, args...),
- }
- return &withStack{
- err,
- callers(),
- }
-}
-
-// WithMessage annotates err with a new message.
-// If err is nil, WithMessage returns nil.
-func WithMessage(err error, message string) error {
- if err == nil {
- return nil
- }
- return &withMessage{
- cause: err,
- msg: message,
- }
-}
-
-// WithMessagef annotates err with the format specifier.
-// If err is nil, WithMessagef returns nil.
-func WithMessagef(err error, format string, args ...interface{}) error {
- if err == nil {
- return nil
- }
- return &withMessage{
- cause: err,
- msg: fmt.Sprintf(format, args...),
- }
-}
-
-type withMessage struct {
- cause error
- msg string
-}
-
-func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() }
-func (w *withMessage) Cause() error { return w.cause }
-
-// Unwrap provides compatibility for Go 1.13 error chains.
-func (w *withMessage) Unwrap() error { return w.cause }
-
-func (w *withMessage) Format(s fmt.State, verb rune) {
- switch verb {
- case 'v':
- if s.Flag('+') {
- fmt.Fprintf(s, "%+v\n", w.Cause())
- io.WriteString(s, w.msg)
- return
- }
- fallthrough
- case 's', 'q':
- io.WriteString(s, w.Error())
- }
-}
-
-// Cause returns the underlying cause of the error, if possible.
-// An error value has a cause if it implements the following
-// interface:
-//
-// type causer interface {
-// Cause() error
-// }
-//
-// If the error does not implement Cause, the original error will
-// be returned. If the error is nil, nil will be returned without further
-// investigation.
-func Cause(err error) error {
- type causer interface {
- Cause() error
- }
-
- for err != nil {
- cause, ok := err.(causer)
- if !ok {
- break
- }
- err = cause.Cause()
- }
- return err
-}
diff --git a/vendor/github.com/pkg/errors/go113.go b/vendor/github.com/pkg/errors/go113.go
deleted file mode 100644
index be0d10d..0000000
--- a/vendor/github.com/pkg/errors/go113.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// +build go1.13
-
-package errors
-
-import (
- stderrors "errors"
-)
-
-// Is reports whether any error in err's chain matches target.
-//
-// The chain consists of err itself followed by the sequence of errors obtained by
-// repeatedly calling Unwrap.
-//
-// An error is considered to match a target if it is equal to that target or if
-// it implements a method Is(error) bool such that Is(target) returns true.
-func Is(err, target error) bool { return stderrors.Is(err, target) }
-
-// As finds the first error in err's chain that matches target, and if so, sets
-// target to that error value and returns true.
-//
-// The chain consists of err itself followed by the sequence of errors obtained by
-// repeatedly calling Unwrap.
-//
-// An error matches target if the error's concrete value is assignable to the value
-// pointed to by target, or if the error has a method As(interface{}) bool such that
-// As(target) returns true. In the latter case, the As method is responsible for
-// setting target.
-//
-// As will panic if target is not a non-nil pointer to either a type that implements
-// error, or to any interface type. As returns false if err is nil.
-func As(err error, target interface{}) bool { return stderrors.As(err, target) }
-
-// Unwrap returns the result of calling the Unwrap method on err, if err's
-// type contains an Unwrap method returning error.
-// Otherwise, Unwrap returns nil.
-func Unwrap(err error) error {
- return stderrors.Unwrap(err)
-}
diff --git a/vendor/github.com/pkg/errors/stack.go b/vendor/github.com/pkg/errors/stack.go
deleted file mode 100644
index 779a834..0000000
--- a/vendor/github.com/pkg/errors/stack.go
+++ /dev/null
@@ -1,177 +0,0 @@
-package errors
-
-import (
- "fmt"
- "io"
- "path"
- "runtime"
- "strconv"
- "strings"
-)
-
-// Frame represents a program counter inside a stack frame.
-// For historical reasons if Frame is interpreted as a uintptr
-// its value represents the program counter + 1.
-type Frame uintptr
-
-// pc returns the program counter for this frame;
-// multiple frames may have the same PC value.
-func (f Frame) pc() uintptr { return uintptr(f) - 1 }
-
-// file returns the full path to the file that contains the
-// function for this Frame's pc.
-func (f Frame) file() string {
- fn := runtime.FuncForPC(f.pc())
- if fn == nil {
- return "unknown"
- }
- file, _ := fn.FileLine(f.pc())
- return file
-}
-
-// line returns the line number of source code of the
-// function for this Frame's pc.
-func (f Frame) line() int {
- fn := runtime.FuncForPC(f.pc())
- if fn == nil {
- return 0
- }
- _, line := fn.FileLine(f.pc())
- return line
-}
-
-// name returns the name of this function, if known.
-func (f Frame) name() string {
- fn := runtime.FuncForPC(f.pc())
- if fn == nil {
- return "unknown"
- }
- return fn.Name()
-}
-
-// Format formats the frame according to the fmt.Formatter interface.
-//
-// %s source file
-// %d source line
-// %n function name
-// %v equivalent to %s:%d
-//
-// Format accepts flags that alter the printing of some verbs, as follows:
-//
-// %+s function name and path of source file relative to the compile time
-// GOPATH separated by \n\t (\n\t)
-// %+v equivalent to %+s:%d
-func (f Frame) Format(s fmt.State, verb rune) {
- switch verb {
- case 's':
- switch {
- case s.Flag('+'):
- io.WriteString(s, f.name())
- io.WriteString(s, "\n\t")
- io.WriteString(s, f.file())
- default:
- io.WriteString(s, path.Base(f.file()))
- }
- case 'd':
- io.WriteString(s, strconv.Itoa(f.line()))
- case 'n':
- io.WriteString(s, funcname(f.name()))
- case 'v':
- f.Format(s, 's')
- io.WriteString(s, ":")
- f.Format(s, 'd')
- }
-}
-
-// MarshalText formats a stacktrace Frame as a text string. The output is the
-// same as that of fmt.Sprintf("%+v", f), but without newlines or tabs.
-func (f Frame) MarshalText() ([]byte, error) {
- name := f.name()
- if name == "unknown" {
- return []byte(name), nil
- }
- return []byte(fmt.Sprintf("%s %s:%d", name, f.file(), f.line())), nil
-}
-
-// StackTrace is stack of Frames from innermost (newest) to outermost (oldest).
-type StackTrace []Frame
-
-// Format formats the stack of Frames according to the fmt.Formatter interface.
-//
-// %s lists source files for each Frame in the stack
-// %v lists the source file and line number for each Frame in the stack
-//
-// Format accepts flags that alter the printing of some verbs, as follows:
-//
-// %+v Prints filename, function, and line number for each Frame in the stack.
-func (st StackTrace) Format(s fmt.State, verb rune) {
- switch verb {
- case 'v':
- switch {
- case s.Flag('+'):
- for _, f := range st {
- io.WriteString(s, "\n")
- f.Format(s, verb)
- }
- case s.Flag('#'):
- fmt.Fprintf(s, "%#v", []Frame(st))
- default:
- st.formatSlice(s, verb)
- }
- case 's':
- st.formatSlice(s, verb)
- }
-}
-
-// formatSlice will format this StackTrace into the given buffer as a slice of
-// Frame, only valid when called with '%s' or '%v'.
-func (st StackTrace) formatSlice(s fmt.State, verb rune) {
- io.WriteString(s, "[")
- for i, f := range st {
- if i > 0 {
- io.WriteString(s, " ")
- }
- f.Format(s, verb)
- }
- io.WriteString(s, "]")
-}
-
-// stack represents a stack of program counters.
-type stack []uintptr
-
-func (s *stack) Format(st fmt.State, verb rune) {
- switch verb {
- case 'v':
- switch {
- case st.Flag('+'):
- for _, pc := range *s {
- f := Frame(pc)
- fmt.Fprintf(st, "\n%+v", f)
- }
- }
- }
-}
-
-func (s *stack) StackTrace() StackTrace {
- f := make([]Frame, len(*s))
- for i := 0; i < len(f); i++ {
- f[i] = Frame((*s)[i])
- }
- return f
-}
-
-func callers() *stack {
- const depth = 32
- var pcs [depth]uintptr
- n := runtime.Callers(3, pcs[:])
- var st stack = pcs[0:n]
- return &st
-}
-
-// funcname removes the path prefix component of a function's name reported by func.Name().
-func funcname(name string) string {
- i := strings.LastIndex(name, "/")
- name = name[i+1:]
- i = strings.Index(name, ".")
- return name[i+1:]
-}
diff --git a/vendor/github.com/templexxx/cpufeat/.gitignore b/vendor/github.com/templexxx/cpufeat/.gitignore
deleted file mode 100644
index 08baa1a..0000000
--- a/vendor/github.com/templexxx/cpufeat/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-# Binaries for programs and plugins
-*.exe
-*.dll
-*.so
-*.dylib
-
-# Test binary, build with `go test -c`
-*.test
-
-# Output of the go coverage tool, specifically when used with LiteIDE
-*.out
-
-# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
-.glide/
-.idea
diff --git a/vendor/github.com/templexxx/cpufeat/LICENSE b/vendor/github.com/templexxx/cpufeat/LICENSE
deleted file mode 100644
index ea5ea89..0000000
--- a/vendor/github.com/templexxx/cpufeat/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/vendor/github.com/templexxx/cpufeat/README.md b/vendor/github.com/templexxx/cpufeat/README.md
deleted file mode 100644
index 16afe53..0000000
--- a/vendor/github.com/templexxx/cpufeat/README.md
+++ /dev/null
@@ -1 +0,0 @@
-see: https://github.com/templexxx/cpu
diff --git a/vendor/github.com/templexxx/cpufeat/cpu.go b/vendor/github.com/templexxx/cpufeat/cpu.go
deleted file mode 100644
index dd5a949..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package cpu implements processor feature detection
-// used by the Go standard libary.
-package cpufeat
-
-var X86 x86
-
-// The booleans in x86 contain the correspondingly named cpuid feature bit.
-// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
-// in addition to the cpuid feature bit being set.
-// The struct is padded to avoid false sharing.
-type x86 struct {
- _ [CacheLineSize]byte
- HasAES bool
- HasAVX bool
- HasAVX2 bool
- HasAVX512 bool
- HasBMI1 bool
- HasBMI2 bool
- HasERMS bool
- HasOSXSAVE bool
- HasPCLMULQDQ bool
- HasPOPCNT bool
- HasSSE2 bool
- HasSSE3 bool
- HasSSSE3 bool
- HasSSE41 bool
- HasSSE42 bool
- _ [CacheLineSize]byte
-}
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_arm.go b/vendor/github.com/templexxx/cpufeat/cpu_arm.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_arm.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_arm64.go b/vendor/github.com/templexxx/cpufeat/cpu_arm64.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_arm64.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_mips.go b/vendor/github.com/templexxx/cpufeat/cpu_mips.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_mips64.go b/vendor/github.com/templexxx/cpufeat/cpu_mips64.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips64.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_mips64le.go b/vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_mipsle.go b/vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
deleted file mode 100644
index b3eb5a9..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 32
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_ppc64.go b/vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
deleted file mode 100644
index 1e738e3..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 128
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go b/vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
deleted file mode 100644
index 1e738e3..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 128
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_s390x.go b/vendor/github.com/templexxx/cpufeat/cpu_s390x.go
deleted file mode 100644
index 17be8fe..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_s390x.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpufeat
-
-const CacheLineSize = 256
diff --git a/vendor/github.com/templexxx/cpufeat/cpu_x86.go b/vendor/github.com/templexxx/cpufeat/cpu_x86.go
deleted file mode 100644
index 19d6d22..0000000
--- a/vendor/github.com/templexxx/cpufeat/cpu_x86.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build 386 amd64 amd64p32
-
-package cpufeat
-
-const CacheLineSize = 64
-
-// cpuid is implemented in cpu_x86.s.
-func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
-
-// xgetbv with ecx = 0 is implemented in cpu_x86.s.
-func xgetbv() (eax, edx uint32)
-
-func init() {
- maxId, _, _, _ := cpuid(0, 0)
-
- if maxId < 1 {
- return
- }
-
- _, _, ecx1, edx1 := cpuid(1, 0)
- X86.HasSSE2 = isSet(26, edx1)
-
- X86.HasSSE3 = isSet(0, ecx1)
- X86.HasPCLMULQDQ = isSet(1, ecx1)
- X86.HasSSSE3 = isSet(9, ecx1)
- X86.HasSSE41 = isSet(19, ecx1)
- X86.HasSSE42 = isSet(20, ecx1)
- X86.HasPOPCNT = isSet(23, ecx1)
- X86.HasAES = isSet(25, ecx1)
- X86.HasOSXSAVE = isSet(27, ecx1)
-
- osSupportsAVX := false
- osSupportAVX512 := false
- // For XGETBV, OSXSAVE bit is required and sufficient.
- if X86.HasOSXSAVE {
- eax, _ := xgetbv()
- // Check if XMM and YMM registers have OS support.
- osSupportsAVX = isSet(1, eax) && isSet(2, eax)
- // Check is ZMM registers have OS support.
- osSupportAVX512 = (eax>>5)&7 == 7 && (eax>>1)&3 == 3
- }
-
- X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
-
- if maxId < 7 {
- return
- }
-
- _, ebx7, _, _ := cpuid(7, 0)
- X86.HasBMI1 = isSet(3, ebx7)
- X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
- X86.HasBMI2 = isSet(8, ebx7)
- X86.HasERMS = isSet(9, ebx7)
- X86.HasAVX512 = hasAVX512(ebx7) && osSupportAVX512
-}
-
-func isSet(bitpos uint, value uint32) bool {
- return value&(1< 0 {
- wordBytes := w * wordSize
- fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
- }
- for i := n - n%wordSize; i < n; i++ {
- dst[i] = a[i] ^ b[i]
- }
-}
-
-func safeXORBytes(dst, a, b []byte, n int) {
- ex := n % 8
- for i := 0; i < ex; i++ {
- dst[i] = a[i] ^ b[i]
- }
-
- for i := ex; i < n; i += 8 {
- _dst := dst[i : i+8]
- _a := a[i : i+8]
- _b := b[i : i+8]
- _dst[0] = _a[0] ^ _b[0]
- _dst[1] = _a[1] ^ _b[1]
- _dst[2] = _a[2] ^ _b[2]
- _dst[3] = _a[3] ^ _b[3]
-
- _dst[4] = _a[4] ^ _b[4]
- _dst[5] = _a[5] ^ _b[5]
- _dst[6] = _a[6] ^ _b[6]
- _dst[7] = _a[7] ^ _b[7]
- }
-}
-
-// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
-// The arguments are assumed to be of equal length.
-func fastXORWords(dst, a, b []byte) {
- dw := *(*[]uintptr)(unsafe.Pointer(&dst))
- aw := *(*[]uintptr)(unsafe.Pointer(&a))
- bw := *(*[]uintptr)(unsafe.Pointer(&b))
- n := len(b) / wordSize
- ex := n % 8
- for i := 0; i < ex; i++ {
- dw[i] = aw[i] ^ bw[i]
- }
-
- for i := ex; i < n; i += 8 {
- _dw := dw[i : i+8]
- _aw := aw[i : i+8]
- _bw := bw[i : i+8]
- _dw[0] = _aw[0] ^ _bw[0]
- _dw[1] = _aw[1] ^ _bw[1]
- _dw[2] = _aw[2] ^ _bw[2]
- _dw[3] = _aw[3] ^ _bw[3]
- _dw[4] = _aw[4] ^ _bw[4]
- _dw[5] = _aw[5] ^ _bw[5]
- _dw[6] = _aw[6] ^ _bw[6]
- _dw[7] = _aw[7] ^ _bw[7]
- }
-}
diff --git a/vendor/github.com/templexxx/xor/sse2_amd64.s b/vendor/github.com/templexxx/xor/sse2_amd64.s
deleted file mode 100644
index d7f702b..0000000
--- a/vendor/github.com/templexxx/xor/sse2_amd64.s
+++ /dev/null
@@ -1,574 +0,0 @@
-#include "textflag.h"
-
-// addr of mem
-#define DST BX
-#define SRC SI
-#define SRC0 TMP4
-#define SRC1 TMP5
-
-// loop args
-// num of vect
-#define VECT CX
-#define LEN DX
-// pos of matrix
-#define POS R8
-
-// tmp store
-// num of vect or ...
-#define TMP1 R9
-// pos of matrix or ...
-#define TMP2 R10
-// store addr of data/parity or ...
-#define TMP3 R11
-#define TMP4 R12
-#define TMP5 R13
-#define TMP6 R14
-
-// func bytesSrc0(dst, src0, src1 []byte)
-TEXT ·xorSrc0(SB), NOSPLIT, $0
- MOVQ len+32(FP), LEN
- CMPQ LEN, $0
- JE ret
- MOVQ dst+0(FP), DST
- MOVQ src0+24(FP), SRC0
- MOVQ src1+48(FP), SRC1
- TESTQ $15, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop16b:
- MOVOU (SRC0)(POS*1), X0
- XORPD (SRC1)(POS*1), X0
- MOVOU X0, (DST)(POS*1)
- ADDQ $16, POS
- CMPQ LEN, POS
- JNE loop16b
- RET
-
-loop_1b:
- MOVB -1(SRC0)(LEN*1), TMP1
- MOVB -1(SRC1)(LEN*1), TMP2
- XORB TMP1, TMP2
- MOVB TMP2, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
- CMPQ LEN, $0
- JE ret
- TESTQ $15, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP1
- ANDQ $15, TMP1
-
-loop_8b:
- MOVQ -8(SRC0)(LEN*1), TMP2
- MOVQ -8(SRC1)(LEN*1), TMP3
- XORQ TMP2, TMP3
- MOVQ TMP3, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP1
- JG loop_8b
-
- CMPQ LEN, $16
- JGE aligned
- RET
-
-ret:
- RET
-
-// func bytesSrc1(dst, src0, src1 []byte)
-TEXT ·xorSrc1(SB), NOSPLIT, $0
- MOVQ len+56(FP), LEN
- CMPQ LEN, $0
- JE ret
- MOVQ dst+0(FP), DST
- MOVQ src0+24(FP), SRC0
- MOVQ src1+48(FP), SRC1
- TESTQ $15, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop16b:
- MOVOU (SRC0)(POS*1), X0
- XORPD (SRC1)(POS*1), X0
- MOVOU X0, (DST)(POS*1)
- ADDQ $16, POS
- CMPQ LEN, POS
- JNE loop16b
- RET
-
-loop_1b:
- MOVB -1(SRC0)(LEN*1), TMP1
- MOVB -1(SRC1)(LEN*1), TMP2
- XORB TMP1, TMP2
- MOVB TMP2, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
- CMPQ LEN, $0
- JE ret
- TESTQ $15, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP1
- ANDQ $15, TMP1
-
-loop_8b:
- MOVQ -8(SRC0)(LEN*1), TMP2
- MOVQ -8(SRC1)(LEN*1), TMP3
- XORQ TMP2, TMP3
- MOVQ TMP3, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP1
- JG loop_8b
-
- CMPQ LEN, $16
- JGE aligned
- RET
-
-ret:
- RET
-
-// func bytesSSE2mini(dst, src0, src1 []byte, size int)
-TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
- MOVQ len+72(FP), LEN
- CMPQ LEN, $0
- JE ret
- MOVQ dst+0(FP), DST
- MOVQ src0+24(FP), SRC0
- MOVQ src1+48(FP), SRC1
- TESTQ $15, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop16b:
- MOVOU (SRC0)(POS*1), X0
- XORPD (SRC1)(POS*1), X0
-
- // MOVOU (SRC1)(POS*1), X4
- // PXOR X4, X0
- MOVOU X0, (DST)(POS*1)
- ADDQ $16, POS
- CMPQ LEN, POS
- JNE loop16b
- RET
-
-loop_1b:
- MOVB -1(SRC0)(LEN*1), TMP1
- MOVB -1(SRC1)(LEN*1), TMP2
- XORB TMP1, TMP2
- MOVB TMP2, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
- CMPQ LEN, $0
- JE ret
- TESTQ $15, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP1
- ANDQ $15, TMP1
-
-loop_8b:
- MOVQ -8(SRC0)(LEN*1), TMP2
- MOVQ -8(SRC1)(LEN*1), TMP3
- XORQ TMP2, TMP3
- MOVQ TMP3, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP1
- JG loop_8b
-
- CMPQ LEN, $16
- JGE aligned
- RET
-
-ret:
- RET
-
-// func bytesSSE2small(dst, src0, src1 []byte, size int)
-TEXT ·bytesSSE2small(SB), NOSPLIT, $0
- MOVQ len+72(FP), LEN
- CMPQ LEN, $0
- JE ret
- MOVQ dst+0(FP), DST
- MOVQ src0+24(FP), SRC0
- MOVQ src1+48(FP), SRC1
- TESTQ $63, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop64b:
- MOVOU (SRC0)(POS*1), X0
- MOVOU 16(SRC0)(POS*1), X1
- MOVOU 32(SRC0)(POS*1), X2
- MOVOU 48(SRC0)(POS*1), X3
-
- MOVOU (SRC1)(POS*1), X4
- MOVOU 16(SRC1)(POS*1), X5
- MOVOU 32(SRC1)(POS*1), X6
- MOVOU 48(SRC1)(POS*1), X7
-
- PXOR X4, X0
- PXOR X5, X1
- PXOR X6, X2
- PXOR X7, X3
-
- MOVOU X0, (DST)(POS*1)
- MOVOU X1, 16(DST)(POS*1)
- MOVOU X2, 32(DST)(POS*1)
- MOVOU X3, 48(DST)(POS*1)
-
- ADDQ $64, POS
- CMPQ LEN, POS
- JNE loop64b
- RET
-
-loop_1b:
- MOVB -1(SRC0)(LEN*1), TMP1
- MOVB -1(SRC1)(LEN*1), TMP2
- XORB TMP1, TMP2
- MOVB TMP2, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
- CMPQ LEN, $0
- JE ret
- TESTQ $63, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP1
- ANDQ $63, TMP1
-
-loop_8b:
- MOVQ -8(SRC0)(LEN*1), TMP2
- MOVQ -8(SRC1)(LEN*1), TMP3
- XORQ TMP2, TMP3
- MOVQ TMP3, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP1
- JG loop_8b
-
- CMPQ LEN, $64
- JGE aligned
- RET
-
-ret:
- RET
-
-// func bytesSSE2big(dst, src0, src1 []byte, size int)
-TEXT ·bytesSSE2big(SB), NOSPLIT, $0
- MOVQ len+72(FP), LEN
- CMPQ LEN, $0
- JE ret
- MOVQ dst+0(FP), DST
- MOVQ src0+24(FP), SRC0
- MOVQ src1+48(FP), SRC1
- TESTQ $63, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop64b:
- MOVOU (SRC0)(POS*1), X0
- MOVOU 16(SRC0)(POS*1), X1
- MOVOU 32(SRC0)(POS*1), X2
- MOVOU 48(SRC0)(POS*1), X3
-
- MOVOU (SRC1)(POS*1), X4
- MOVOU 16(SRC1)(POS*1), X5
- MOVOU 32(SRC1)(POS*1), X6
- MOVOU 48(SRC1)(POS*1), X7
-
- PXOR X4, X0
- PXOR X5, X1
- PXOR X6, X2
- PXOR X7, X3
-
- LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
- LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
- LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
- LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
-
- ADDQ $64, POS
- CMPQ LEN, POS
- JNE loop64b
- RET
-
-loop_1b:
- MOVB -1(SRC0)(LEN*1), TMP1
- MOVB -1(SRC1)(LEN*1), TMP2
- XORB TMP1, TMP2
- MOVB TMP2, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
- CMPQ LEN, $0
- JE ret
- TESTQ $63, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP1
- ANDQ $63, TMP1
-
-loop_8b:
- MOVQ -8(SRC0)(LEN*1), TMP2
- MOVQ -8(SRC1)(LEN*1), TMP3
- XORQ TMP2, TMP3
- MOVQ TMP3, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP1
- JG loop_8b
-
- CMPQ LEN, $64
- JGE aligned
- RET
-
-ret:
- RET
-
-// func matrixSSE2small(dst []byte, src [][]byte)
-TEXT ·matrixSSE2small(SB), NOSPLIT, $0
- MOVQ dst+0(FP), DST
- MOVQ src+24(FP), SRC
- MOVQ vec+32(FP), VECT
- MOVQ len+8(FP), LEN
- TESTQ $63, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop64b:
- MOVQ VECT, TMP1
- SUBQ $2, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ TMP3, TMP4
- MOVOU (TMP3)(POS*1), X0
- MOVOU 16(TMP4)(POS*1), X1
- MOVOU 32(TMP3)(POS*1), X2
- MOVOU 48(TMP4)(POS*1), X3
-
-next_vect:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ TMP3, TMP4
- MOVOU (TMP3)(POS*1), X4
- MOVOU 16(TMP4)(POS*1), X5
- MOVOU 32(TMP3)(POS*1), X6
- MOVOU 48(TMP4)(POS*1), X7
- PXOR X4, X0
- PXOR X5, X1
- PXOR X6, X2
- PXOR X7, X3
- SUBQ $1, TMP1
- JGE next_vect
-
- MOVOU X0, (DST)(POS*1)
- MOVOU X1, 16(DST)(POS*1)
- MOVOU X2, 32(DST)(POS*1)
- MOVOU X3, 48(DST)(POS*1)
-
- ADDQ $64, POS
- CMPQ LEN, POS
- JNE loop64b
- RET
-
-loop_1b:
- MOVQ VECT, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- SUBQ $2, TMP1
- MOVB -1(TMP3)(LEN*1), TMP5
-
-next_vect_1b:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVB -1(TMP3)(LEN*1), TMP6
- XORB TMP6, TMP5
- SUBQ $1, TMP1
- JGE next_vect_1b
-
- MOVB TMP5, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
-
- CMPQ LEN, $0
- JE ret
- TESTQ $63, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP4
- ANDQ $63, TMP4
-
-loop_8b:
- MOVQ VECT, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- SUBQ $2, TMP1
- MOVQ -8(TMP3)(LEN*1), TMP5
-
-next_vect_8b:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ -8(TMP3)(LEN*1), TMP6
- XORQ TMP6, TMP5
- SUBQ $1, TMP1
- JGE next_vect_8b
-
- MOVQ TMP5, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP4
- JG loop_8b
-
- CMPQ LEN, $64
- JGE aligned
- RET
-
-ret:
- RET
-
-// func matrixSSE2big(dst []byte, src [][]byte)
-TEXT ·matrixSSE2big(SB), NOSPLIT, $0
- MOVQ dst+0(FP), DST
- MOVQ src+24(FP), SRC
- MOVQ vec+32(FP), VECT
- MOVQ len+8(FP), LEN
- TESTQ $63, LEN
- JNZ not_aligned
-
-aligned:
- MOVQ $0, POS
-
-loop64b:
- MOVQ VECT, TMP1
- SUBQ $2, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ TMP3, TMP4
- MOVOU (TMP3)(POS*1), X0
- MOVOU 16(TMP4)(POS*1), X1
- MOVOU 32(TMP3)(POS*1), X2
- MOVOU 48(TMP4)(POS*1), X3
-
-next_vect:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ TMP3, TMP4
- MOVOU (TMP3)(POS*1), X4
- MOVOU 16(TMP4)(POS*1), X5
- MOVOU 32(TMP3)(POS*1), X6
- MOVOU 48(TMP4)(POS*1), X7
- PXOR X4, X0
- PXOR X5, X1
- PXOR X6, X2
- PXOR X7, X3
- SUBQ $1, TMP1
- JGE next_vect
-
- LONG $0xe70f4266; WORD $0x0304
- LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
- LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
- LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
-
- ADDQ $64, POS
- CMPQ LEN, POS
- JNE loop64b
- RET
-
-loop_1b:
- MOVQ VECT, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- SUBQ $2, TMP1
- MOVB -1(TMP3)(LEN*1), TMP5
-
-next_vect_1b:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVB -1(TMP3)(LEN*1), TMP6
- XORB TMP6, TMP5
- SUBQ $1, TMP1
- JGE next_vect_1b
-
- MOVB TMP5, -1(DST)(LEN*1)
- SUBQ $1, LEN
- TESTQ $7, LEN
- JNZ loop_1b
-
- CMPQ LEN, $0
- JE ret
- TESTQ $63, LEN
- JZ aligned
-
-not_aligned:
- TESTQ $7, LEN
- JNE loop_1b
- MOVQ LEN, TMP4
- ANDQ $63, TMP4
-
-loop_8b:
- MOVQ VECT, TMP1
- MOVQ $0, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- SUBQ $2, TMP1
- MOVQ -8(TMP3)(LEN*1), TMP5
-
-next_vect_8b:
- ADDQ $24, TMP2
- MOVQ (SRC)(TMP2*1), TMP3
- MOVQ -8(TMP3)(LEN*1), TMP6
- XORQ TMP6, TMP5
- SUBQ $1, TMP1
- JGE next_vect_8b
-
- MOVQ TMP5, -8(DST)(LEN*1)
- SUBQ $8, LEN
- SUBQ $8, TMP4
- JG loop_8b
-
- CMPQ LEN, $64
- JGE aligned
- RET
-
-ret:
- RET
-
-TEXT ·hasSSE2(SB), NOSPLIT, $0
- XORQ AX, AX
- INCL AX
- CPUID
- SHRQ $26, DX
- ANDQ $1, DX
- MOVB DX, ret+0(FP)
- RET
-
diff --git a/vendor/github.com/templexxx/xor/xor.go b/vendor/github.com/templexxx/xor/xor.go
deleted file mode 100644
index 2fa5616..0000000
--- a/vendor/github.com/templexxx/xor/xor.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package xor
-
-// SIMD Extensions
-const (
- none = iota
- avx2
- // first introduced by Intel with the initial version of the Pentium 4 in 2001
- // so I think we can assume all amd64 has sse2
- sse2
-)
-
-var extension = none
-
-// Bytes : chose the shortest one as xor size
-// it's better to use it for big data ( > 64bytes )
-func Bytes(dst, src0, src1 []byte) {
- size := len(dst)
- if size > len(src0) {
- size = len(src0)
- }
- if size > len(src1) {
- size = len(src1)
- }
- xorBytes(dst, src0, src1, size)
-}
-
-// BytesSameLen : all slice's length must be equal
-// cut size branch, save time for small data
-func BytesSameLen(dst, src0, src1 []byte) {
- xorSrc1(dst, src0, src1)
-}
-
-// BytesSrc0 : src1 >= src0, dst >= src0
-// xor src0's len bytes
-func BytesSrc0(dst, src0, src1 []byte) {
- xorSrc0(dst, src0, src1)
-}
-
-// BytesSrc1 : src0 >= src1, dst >= src1
-// xor src1's len bytes
-func BytesSrc1(dst, src0, src1 []byte) {
- xorSrc1(dst, src0, src1)
-}
-
-// Matrix : all slice's length must be equal && != 0
-// len(src) must >= 2
-func Matrix(dst []byte, src [][]byte) {
- xorMatrix(dst, src)
-}
diff --git a/vendor/github.com/templexxx/xor/xor_amd64.go b/vendor/github.com/templexxx/xor/xor_amd64.go
deleted file mode 100644
index b449bc0..0000000
--- a/vendor/github.com/templexxx/xor/xor_amd64.go
+++ /dev/null
@@ -1,120 +0,0 @@
-package xor
-
-import "github.com/templexxx/cpufeat"
-
-func init() {
- getEXT()
-}
-
-func getEXT() {
- if cpufeat.X86.HasAVX2 {
- extension = avx2
- } else {
- extension = sse2
- }
- return
-}
-
-func xorBytes(dst, src0, src1 []byte, size int) {
- switch extension {
- case avx2:
- bytesAVX2(dst, src0, src1, size)
- default:
- bytesSSE2(dst, src0, src1, size)
- }
-}
-
-// non-temporal hint store
-const nontmp = 8 * 1024
-const avx2loopsize = 128
-
-func bytesAVX2(dst, src0, src1 []byte, size int) {
- if size < avx2loopsize {
- bytesAVX2mini(dst, src0, src1, size)
- } else if size >= avx2loopsize && size <= nontmp {
- bytesAVX2small(dst, src0, src1, size)
- } else {
- bytesAVX2big(dst, src0, src1, size)
- }
-}
-
-const sse2loopsize = 64
-
-func bytesSSE2(dst, src0, src1 []byte, size int) {
- if size < sse2loopsize {
- bytesSSE2mini(dst, src0, src1, size)
- } else if size >= sse2loopsize && size <= nontmp {
- bytesSSE2small(dst, src0, src1, size)
- } else {
- bytesSSE2big(dst, src0, src1, size)
- }
-}
-
-func xorMatrix(dst []byte, src [][]byte) {
- switch extension {
- case avx2:
- matrixAVX2(dst, src)
- default:
- matrixSSE2(dst, src)
- }
-}
-
-func matrixAVX2(dst []byte, src [][]byte) {
- size := len(dst)
- if size > nontmp {
- matrixAVX2big(dst, src)
- } else {
- matrixAVX2small(dst, src)
- }
-}
-
-func matrixSSE2(dst []byte, src [][]byte) {
- size := len(dst)
- if size > nontmp {
- matrixSSE2big(dst, src)
- } else {
- matrixSSE2small(dst, src)
- }
-}
-
-//go:noescape
-func xorSrc0(dst, src0, src1 []byte)
-
-//go:noescape
-func xorSrc1(dst, src0, src1 []byte)
-
-//go:noescape
-func bytesAVX2mini(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func bytesAVX2big(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func bytesAVX2small(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func bytesSSE2mini(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func bytesSSE2small(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func bytesSSE2big(dst, src0, src1 []byte, size int)
-
-//go:noescape
-func matrixAVX2small(dst []byte, src [][]byte)
-
-//go:noescape
-func matrixAVX2big(dst []byte, src [][]byte)
-
-//go:noescape
-func matrixSSE2small(dst []byte, src [][]byte)
-
-//go:noescape
-func matrixSSE2big(dst []byte, src [][]byte)
-
-//go:noescape
-func hasAVX2() bool
-
-//go:noescape
-func hasSSE2() bool
diff --git a/vendor/github.com/templexxx/xor/xor_other.go b/vendor/github.com/templexxx/xor/xor_other.go
deleted file mode 100644
index 4aa2967..0000000
--- a/vendor/github.com/templexxx/xor/xor_other.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// +build !amd64 noasm
-
-package xor
-
-func xorBytes(dst, src0, src1 []byte, size int) {
- bytesNoSIMD(dst, src0, src1, size)
-}
-
-func xorMatrix(dst []byte, src [][]byte) {
- matrixNoSIMD(dst, src)
-}
-
-func xorSrc0(dst, src0, src1 []byte) {
- bytesNoSIMD(dst, src0, src1, len(src0))
-}
-
-func xorSrc1(dst, src0, src1 []byte) {
- bytesNoSIMD(dst, src0, src1, len(src1))
-}
diff --git a/vendor/github.com/tjfoc/gmsm/LICENSE b/vendor/github.com/tjfoc/gmsm/LICENSE
deleted file mode 100644
index 8dada3e..0000000
--- a/vendor/github.com/tjfoc/gmsm/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright {yyyy} {name of copyright owner}
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/vendor/github.com/tjfoc/gmsm/sm4/sm4.go b/vendor/github.com/tjfoc/gmsm/sm4/sm4.go
deleted file mode 100644
index 9956af2..0000000
--- a/vendor/github.com/tjfoc/gmsm/sm4/sm4.go
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
-Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-sm4 acceleration
-modified by Jack, 2017 Oct
-*/
-
-package sm4
-
-import (
- "bytes"
- "crypto/cipher"
- "errors"
- "strconv"
-)
-
-const BlockSize = 16
-var IV=make([]byte,BlockSize)
-type SM4Key []byte
-
-// Cipher is an instance of SM4 encryption.
-type Sm4Cipher struct {
- subkeys []uint32
- block1 []uint32
- block2 []byte
-}
-
-// sm4密钥参量
-var fk = [4]uint32{
- 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc,
-}
-
-// sm4密钥参量
-var ck = [32]uint32{
- 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
- 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
- 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
- 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
- 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
- 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
- 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
- 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279,
-}
-
-// sm4密钥参量
-var sbox = [256]uint8{
- 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
- 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
- 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
- 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
- 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
- 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
- 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
- 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
- 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
- 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
- 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
- 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
- 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
- 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
- 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
- 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
-}
-
-var sbox0 = [256]uint32{
- 0xd55b5b8e, 0x924242d0, 0xeaa7a74d, 0xfdfbfb06, 0xcf3333fc, 0xe2878765, 0x3df4f4c9, 0xb5dede6b, 0x1658584e, 0xb4dada6e, 0x14505044, 0xc10b0bca, 0x28a0a088, 0xf8efef17, 0x2cb0b09c, 0x05141411,
- 0x2bacac87, 0x669d9dfb, 0x986a6af2, 0x77d9d9ae, 0x2aa8a882, 0xbcfafa46, 0x04101014, 0xc00f0fcf, 0xa8aaaa02, 0x45111154, 0x134c4c5f, 0x269898be, 0x4825256d, 0x841a1a9e, 0x0618181e, 0x9b6666fd,
- 0x9e7272ec, 0x4309094a, 0x51414110, 0xf7d3d324, 0x934646d5, 0xecbfbf53, 0x9a6262f8, 0x7be9e992, 0x33ccccff, 0x55515104, 0x0b2c2c27, 0x420d0d4f, 0xeeb7b759, 0xcc3f3ff3, 0xaeb2b21c, 0x638989ea,
- 0xe7939374, 0xb1cece7f, 0x1c70706c, 0xaba6a60d, 0xca2727ed, 0x08202028, 0xeba3a348, 0x975656c1, 0x82020280, 0xdc7f7fa3, 0x965252c4, 0xf9ebeb12, 0x74d5d5a1, 0x8d3e3eb3, 0x3ffcfcc3, 0xa49a9a3e,
- 0x461d1d5b, 0x071c1c1b, 0xa59e9e3b, 0xfff3f30c, 0xf0cfcf3f, 0x72cdcdbf, 0x175c5c4b, 0xb8eaea52, 0x810e0e8f, 0x5865653d, 0x3cf0f0cc, 0x1964647d, 0xe59b9b7e, 0x87161691, 0x4e3d3d73, 0xaaa2a208,
- 0x69a1a1c8, 0x6aadadc7, 0x83060685, 0xb0caca7a, 0x70c5c5b5, 0x659191f4, 0xd96b6bb2, 0x892e2ea7, 0xfbe3e318, 0xe8afaf47, 0x0f3c3c33, 0x4a2d2d67, 0x71c1c1b0, 0x5759590e, 0x9f7676e9, 0x35d4d4e1,
- 0x1e787866, 0x249090b4, 0x0e383836, 0x5f797926, 0x628d8def, 0x59616138, 0xd2474795, 0xa08a8a2a, 0x259494b1, 0x228888aa, 0x7df1f18c, 0x3bececd7, 0x01040405, 0x218484a5, 0x79e1e198, 0x851e1e9b,
- 0xd7535384, 0x00000000, 0x4719195e, 0x565d5d0b, 0x9d7e7ee3, 0xd04f4f9f, 0x279c9cbb, 0x5349491a, 0x4d31317c, 0x36d8d8ee, 0x0208080a, 0xe49f9f7b, 0xa2828220, 0xc71313d4, 0xcb2323e8, 0x9c7a7ae6,
- 0xe9abab42, 0xbdfefe43, 0x882a2aa2, 0xd14b4b9a, 0x41010140, 0xc41f1fdb, 0x38e0e0d8, 0xb7d6d661, 0xa18e8e2f, 0xf4dfdf2b, 0xf1cbcb3a, 0xcd3b3bf6, 0xfae7e71d, 0x608585e5, 0x15545441, 0xa3868625,
- 0xe3838360, 0xacbaba16, 0x5c757529, 0xa6929234, 0x996e6ef7, 0x34d0d0e4, 0x1a686872, 0x54555501, 0xafb6b619, 0x914e4edf, 0x32c8c8fa, 0x30c0c0f0, 0xf6d7d721, 0x8e3232bc, 0xb3c6c675, 0xe08f8f6f,
- 0x1d747469, 0xf5dbdb2e, 0xe18b8b6a, 0x2eb8b896, 0x800a0a8a, 0x679999fe, 0xc92b2be2, 0x618181e0, 0xc30303c0, 0x29a4a48d, 0x238c8caf, 0xa9aeae07, 0x0d343439, 0x524d4d1f, 0x4f393976, 0x6ebdbdd3,
- 0xd6575781, 0xd86f6fb7, 0x37dcdceb, 0x44151551, 0xdd7b7ba6, 0xfef7f709, 0x8c3a3ab6, 0x2fbcbc93, 0x030c0c0f, 0xfcffff03, 0x6ba9a9c2, 0x73c9c9ba, 0x6cb5b5d9, 0x6db1b1dc, 0x5a6d6d37, 0x50454515,
- 0x8f3636b9, 0x1b6c6c77, 0xadbebe13, 0x904a4ada, 0xb9eeee57, 0xde7777a9, 0xbef2f24c, 0x7efdfd83, 0x11444455, 0xda6767bd, 0x5d71712c, 0x40050545, 0x1f7c7c63, 0x10404050, 0x5b696932, 0xdb6363b8,
- 0x0a282822, 0xc20707c5, 0x31c4c4f5, 0x8a2222a8, 0xa7969631, 0xce3737f9, 0x7aeded97, 0xbff6f649, 0x2db4b499, 0x75d1d1a4, 0xd3434390, 0x1248485a, 0xbae2e258, 0xe6979771, 0xb6d2d264, 0xb2c2c270,
- 0x8b2626ad, 0x68a5a5cd, 0x955e5ecb, 0x4b292962, 0x0c30303c, 0x945a5ace, 0x76ddddab, 0x7ff9f986, 0x649595f1, 0xbbe6e65d, 0xf2c7c735, 0x0924242d, 0xc61717d1, 0x6fb9b9d6, 0xc51b1bde, 0x86121294,
- 0x18606078, 0xf3c3c330, 0x7cf5f589, 0xefb3b35c, 0x3ae8e8d2, 0xdf7373ac, 0x4c353579, 0x208080a0, 0x78e5e59d, 0xedbbbb56, 0x5e7d7d23, 0x3ef8f8c6, 0xd45f5f8b, 0xc82f2fe7, 0x39e4e4dd, 0x49212168,
-}
-
-var sbox1 = [256]uint32{
- 0x5b5b8ed5, 0x4242d092, 0xa7a74dea, 0xfbfb06fd, 0x3333fccf, 0x878765e2, 0xf4f4c93d, 0xdede6bb5, 0x58584e16, 0xdada6eb4, 0x50504414, 0x0b0bcac1, 0xa0a08828, 0xefef17f8, 0xb0b09c2c, 0x14141105,
- 0xacac872b, 0x9d9dfb66, 0x6a6af298, 0xd9d9ae77, 0xa8a8822a, 0xfafa46bc, 0x10101404, 0x0f0fcfc0, 0xaaaa02a8, 0x11115445, 0x4c4c5f13, 0x9898be26, 0x25256d48, 0x1a1a9e84, 0x18181e06, 0x6666fd9b,
- 0x7272ec9e, 0x09094a43, 0x41411051, 0xd3d324f7, 0x4646d593, 0xbfbf53ec, 0x6262f89a, 0xe9e9927b, 0xccccff33, 0x51510455, 0x2c2c270b, 0x0d0d4f42, 0xb7b759ee, 0x3f3ff3cc, 0xb2b21cae, 0x8989ea63,
- 0x939374e7, 0xcece7fb1, 0x70706c1c, 0xa6a60dab, 0x2727edca, 0x20202808, 0xa3a348eb, 0x5656c197, 0x02028082, 0x7f7fa3dc, 0x5252c496, 0xebeb12f9, 0xd5d5a174, 0x3e3eb38d, 0xfcfcc33f, 0x9a9a3ea4,
- 0x1d1d5b46, 0x1c1c1b07, 0x9e9e3ba5, 0xf3f30cff, 0xcfcf3ff0, 0xcdcdbf72, 0x5c5c4b17, 0xeaea52b8, 0x0e0e8f81, 0x65653d58, 0xf0f0cc3c, 0x64647d19, 0x9b9b7ee5, 0x16169187, 0x3d3d734e, 0xa2a208aa,
- 0xa1a1c869, 0xadadc76a, 0x06068583, 0xcaca7ab0, 0xc5c5b570, 0x9191f465, 0x6b6bb2d9, 0x2e2ea789, 0xe3e318fb, 0xafaf47e8, 0x3c3c330f, 0x2d2d674a, 0xc1c1b071, 0x59590e57, 0x7676e99f, 0xd4d4e135,
- 0x7878661e, 0x9090b424, 0x3838360e, 0x7979265f, 0x8d8def62, 0x61613859, 0x474795d2, 0x8a8a2aa0, 0x9494b125, 0x8888aa22, 0xf1f18c7d, 0xececd73b, 0x04040501, 0x8484a521, 0xe1e19879, 0x1e1e9b85,
- 0x535384d7, 0x00000000, 0x19195e47, 0x5d5d0b56, 0x7e7ee39d, 0x4f4f9fd0, 0x9c9cbb27, 0x49491a53, 0x31317c4d, 0xd8d8ee36, 0x08080a02, 0x9f9f7be4, 0x828220a2, 0x1313d4c7, 0x2323e8cb, 0x7a7ae69c,
- 0xabab42e9, 0xfefe43bd, 0x2a2aa288, 0x4b4b9ad1, 0x01014041, 0x1f1fdbc4, 0xe0e0d838, 0xd6d661b7, 0x8e8e2fa1, 0xdfdf2bf4, 0xcbcb3af1, 0x3b3bf6cd, 0xe7e71dfa, 0x8585e560, 0x54544115, 0x868625a3,
- 0x838360e3, 0xbaba16ac, 0x7575295c, 0x929234a6, 0x6e6ef799, 0xd0d0e434, 0x6868721a, 0x55550154, 0xb6b619af, 0x4e4edf91, 0xc8c8fa32, 0xc0c0f030, 0xd7d721f6, 0x3232bc8e, 0xc6c675b3, 0x8f8f6fe0,
- 0x7474691d, 0xdbdb2ef5, 0x8b8b6ae1, 0xb8b8962e, 0x0a0a8a80, 0x9999fe67, 0x2b2be2c9, 0x8181e061, 0x0303c0c3, 0xa4a48d29, 0x8c8caf23, 0xaeae07a9, 0x3434390d, 0x4d4d1f52, 0x3939764f, 0xbdbdd36e,
- 0x575781d6, 0x6f6fb7d8, 0xdcdceb37, 0x15155144, 0x7b7ba6dd, 0xf7f709fe, 0x3a3ab68c, 0xbcbc932f, 0x0c0c0f03, 0xffff03fc, 0xa9a9c26b, 0xc9c9ba73, 0xb5b5d96c, 0xb1b1dc6d, 0x6d6d375a, 0x45451550,
- 0x3636b98f, 0x6c6c771b, 0xbebe13ad, 0x4a4ada90, 0xeeee57b9, 0x7777a9de, 0xf2f24cbe, 0xfdfd837e, 0x44445511, 0x6767bdda, 0x71712c5d, 0x05054540, 0x7c7c631f, 0x40405010, 0x6969325b, 0x6363b8db,
- 0x2828220a, 0x0707c5c2, 0xc4c4f531, 0x2222a88a, 0x969631a7, 0x3737f9ce, 0xeded977a, 0xf6f649bf, 0xb4b4992d, 0xd1d1a475, 0x434390d3, 0x48485a12, 0xe2e258ba, 0x979771e6, 0xd2d264b6, 0xc2c270b2,
- 0x2626ad8b, 0xa5a5cd68, 0x5e5ecb95, 0x2929624b, 0x30303c0c, 0x5a5ace94, 0xddddab76, 0xf9f9867f, 0x9595f164, 0xe6e65dbb, 0xc7c735f2, 0x24242d09, 0x1717d1c6, 0xb9b9d66f, 0x1b1bdec5, 0x12129486,
- 0x60607818, 0xc3c330f3, 0xf5f5897c, 0xb3b35cef, 0xe8e8d23a, 0x7373acdf, 0x3535794c, 0x8080a020, 0xe5e59d78, 0xbbbb56ed, 0x7d7d235e, 0xf8f8c63e, 0x5f5f8bd4, 0x2f2fe7c8, 0xe4e4dd39, 0x21216849,
-}
-
-var sbox2 = [256]uint32{
- 0x5b8ed55b, 0x42d09242, 0xa74deaa7, 0xfb06fdfb, 0x33fccf33, 0x8765e287, 0xf4c93df4, 0xde6bb5de, 0x584e1658, 0xda6eb4da, 0x50441450, 0x0bcac10b, 0xa08828a0, 0xef17f8ef, 0xb09c2cb0, 0x14110514,
- 0xac872bac, 0x9dfb669d, 0x6af2986a, 0xd9ae77d9, 0xa8822aa8, 0xfa46bcfa, 0x10140410, 0x0fcfc00f, 0xaa02a8aa, 0x11544511, 0x4c5f134c, 0x98be2698, 0x256d4825, 0x1a9e841a, 0x181e0618, 0x66fd9b66,
- 0x72ec9e72, 0x094a4309, 0x41105141, 0xd324f7d3, 0x46d59346, 0xbf53ecbf, 0x62f89a62, 0xe9927be9, 0xccff33cc, 0x51045551, 0x2c270b2c, 0x0d4f420d, 0xb759eeb7, 0x3ff3cc3f, 0xb21caeb2, 0x89ea6389,
- 0x9374e793, 0xce7fb1ce, 0x706c1c70, 0xa60daba6, 0x27edca27, 0x20280820, 0xa348eba3, 0x56c19756, 0x02808202, 0x7fa3dc7f, 0x52c49652, 0xeb12f9eb, 0xd5a174d5, 0x3eb38d3e, 0xfcc33ffc, 0x9a3ea49a,
- 0x1d5b461d, 0x1c1b071c, 0x9e3ba59e, 0xf30cfff3, 0xcf3ff0cf, 0xcdbf72cd, 0x5c4b175c, 0xea52b8ea, 0x0e8f810e, 0x653d5865, 0xf0cc3cf0, 0x647d1964, 0x9b7ee59b, 0x16918716, 0x3d734e3d, 0xa208aaa2,
- 0xa1c869a1, 0xadc76aad, 0x06858306, 0xca7ab0ca, 0xc5b570c5, 0x91f46591, 0x6bb2d96b, 0x2ea7892e, 0xe318fbe3, 0xaf47e8af, 0x3c330f3c, 0x2d674a2d, 0xc1b071c1, 0x590e5759, 0x76e99f76, 0xd4e135d4,
- 0x78661e78, 0x90b42490, 0x38360e38, 0x79265f79, 0x8def628d, 0x61385961, 0x4795d247, 0x8a2aa08a, 0x94b12594, 0x88aa2288, 0xf18c7df1, 0xecd73bec, 0x04050104, 0x84a52184, 0xe19879e1, 0x1e9b851e,
- 0x5384d753, 0x00000000, 0x195e4719, 0x5d0b565d, 0x7ee39d7e, 0x4f9fd04f, 0x9cbb279c, 0x491a5349, 0x317c4d31, 0xd8ee36d8, 0x080a0208, 0x9f7be49f, 0x8220a282, 0x13d4c713, 0x23e8cb23, 0x7ae69c7a,
- 0xab42e9ab, 0xfe43bdfe, 0x2aa2882a, 0x4b9ad14b, 0x01404101, 0x1fdbc41f, 0xe0d838e0, 0xd661b7d6, 0x8e2fa18e, 0xdf2bf4df, 0xcb3af1cb, 0x3bf6cd3b, 0xe71dfae7, 0x85e56085, 0x54411554, 0x8625a386,
- 0x8360e383, 0xba16acba, 0x75295c75, 0x9234a692, 0x6ef7996e, 0xd0e434d0, 0x68721a68, 0x55015455, 0xb619afb6, 0x4edf914e, 0xc8fa32c8, 0xc0f030c0, 0xd721f6d7, 0x32bc8e32, 0xc675b3c6, 0x8f6fe08f,
- 0x74691d74, 0xdb2ef5db, 0x8b6ae18b, 0xb8962eb8, 0x0a8a800a, 0x99fe6799, 0x2be2c92b, 0x81e06181, 0x03c0c303, 0xa48d29a4, 0x8caf238c, 0xae07a9ae, 0x34390d34, 0x4d1f524d, 0x39764f39, 0xbdd36ebd,
- 0x5781d657, 0x6fb7d86f, 0xdceb37dc, 0x15514415, 0x7ba6dd7b, 0xf709fef7, 0x3ab68c3a, 0xbc932fbc, 0x0c0f030c, 0xff03fcff, 0xa9c26ba9, 0xc9ba73c9, 0xb5d96cb5, 0xb1dc6db1, 0x6d375a6d, 0x45155045,
- 0x36b98f36, 0x6c771b6c, 0xbe13adbe, 0x4ada904a, 0xee57b9ee, 0x77a9de77, 0xf24cbef2, 0xfd837efd, 0x44551144, 0x67bdda67, 0x712c5d71, 0x05454005, 0x7c631f7c, 0x40501040, 0x69325b69, 0x63b8db63,
- 0x28220a28, 0x07c5c207, 0xc4f531c4, 0x22a88a22, 0x9631a796, 0x37f9ce37, 0xed977aed, 0xf649bff6, 0xb4992db4, 0xd1a475d1, 0x4390d343, 0x485a1248, 0xe258bae2, 0x9771e697, 0xd264b6d2, 0xc270b2c2,
- 0x26ad8b26, 0xa5cd68a5, 0x5ecb955e, 0x29624b29, 0x303c0c30, 0x5ace945a, 0xddab76dd, 0xf9867ff9, 0x95f16495, 0xe65dbbe6, 0xc735f2c7, 0x242d0924, 0x17d1c617, 0xb9d66fb9, 0x1bdec51b, 0x12948612,
- 0x60781860, 0xc330f3c3, 0xf5897cf5, 0xb35cefb3, 0xe8d23ae8, 0x73acdf73, 0x35794c35, 0x80a02080, 0xe59d78e5, 0xbb56edbb, 0x7d235e7d, 0xf8c63ef8, 0x5f8bd45f, 0x2fe7c82f, 0xe4dd39e4, 0x21684921,
-}
-
-var sbox3 = [256]uint32{
- 0x8ed55b5b, 0xd0924242, 0x4deaa7a7, 0x06fdfbfb, 0xfccf3333, 0x65e28787, 0xc93df4f4, 0x6bb5dede, 0x4e165858, 0x6eb4dada, 0x44145050, 0xcac10b0b, 0x8828a0a0, 0x17f8efef, 0x9c2cb0b0, 0x11051414,
- 0x872bacac, 0xfb669d9d, 0xf2986a6a, 0xae77d9d9, 0x822aa8a8, 0x46bcfafa, 0x14041010, 0xcfc00f0f, 0x02a8aaaa, 0x54451111, 0x5f134c4c, 0xbe269898, 0x6d482525, 0x9e841a1a, 0x1e061818, 0xfd9b6666,
- 0xec9e7272, 0x4a430909, 0x10514141, 0x24f7d3d3, 0xd5934646, 0x53ecbfbf, 0xf89a6262, 0x927be9e9, 0xff33cccc, 0x04555151, 0x270b2c2c, 0x4f420d0d, 0x59eeb7b7, 0xf3cc3f3f, 0x1caeb2b2, 0xea638989,
- 0x74e79393, 0x7fb1cece, 0x6c1c7070, 0x0daba6a6, 0xedca2727, 0x28082020, 0x48eba3a3, 0xc1975656, 0x80820202, 0xa3dc7f7f, 0xc4965252, 0x12f9ebeb, 0xa174d5d5, 0xb38d3e3e, 0xc33ffcfc, 0x3ea49a9a,
- 0x5b461d1d, 0x1b071c1c, 0x3ba59e9e, 0x0cfff3f3, 0x3ff0cfcf, 0xbf72cdcd, 0x4b175c5c, 0x52b8eaea, 0x8f810e0e, 0x3d586565, 0xcc3cf0f0, 0x7d196464, 0x7ee59b9b, 0x91871616, 0x734e3d3d, 0x08aaa2a2,
- 0xc869a1a1, 0xc76aadad, 0x85830606, 0x7ab0caca, 0xb570c5c5, 0xf4659191, 0xb2d96b6b, 0xa7892e2e, 0x18fbe3e3, 0x47e8afaf, 0x330f3c3c, 0x674a2d2d, 0xb071c1c1, 0x0e575959, 0xe99f7676, 0xe135d4d4,
- 0x661e7878, 0xb4249090, 0x360e3838, 0x265f7979, 0xef628d8d, 0x38596161, 0x95d24747, 0x2aa08a8a, 0xb1259494, 0xaa228888, 0x8c7df1f1, 0xd73becec, 0x05010404, 0xa5218484, 0x9879e1e1, 0x9b851e1e,
- 0x84d75353, 0x00000000, 0x5e471919, 0x0b565d5d, 0xe39d7e7e, 0x9fd04f4f, 0xbb279c9c, 0x1a534949, 0x7c4d3131, 0xee36d8d8, 0x0a020808, 0x7be49f9f, 0x20a28282, 0xd4c71313, 0xe8cb2323, 0xe69c7a7a,
- 0x42e9abab, 0x43bdfefe, 0xa2882a2a, 0x9ad14b4b, 0x40410101, 0xdbc41f1f, 0xd838e0e0, 0x61b7d6d6, 0x2fa18e8e, 0x2bf4dfdf, 0x3af1cbcb, 0xf6cd3b3b, 0x1dfae7e7, 0xe5608585, 0x41155454, 0x25a38686,
- 0x60e38383, 0x16acbaba, 0x295c7575, 0x34a69292, 0xf7996e6e, 0xe434d0d0, 0x721a6868, 0x01545555, 0x19afb6b6, 0xdf914e4e, 0xfa32c8c8, 0xf030c0c0, 0x21f6d7d7, 0xbc8e3232, 0x75b3c6c6, 0x6fe08f8f,
- 0x691d7474, 0x2ef5dbdb, 0x6ae18b8b, 0x962eb8b8, 0x8a800a0a, 0xfe679999, 0xe2c92b2b, 0xe0618181, 0xc0c30303, 0x8d29a4a4, 0xaf238c8c, 0x07a9aeae, 0x390d3434, 0x1f524d4d, 0x764f3939, 0xd36ebdbd,
- 0x81d65757, 0xb7d86f6f, 0xeb37dcdc, 0x51441515, 0xa6dd7b7b, 0x09fef7f7, 0xb68c3a3a, 0x932fbcbc, 0x0f030c0c, 0x03fcffff, 0xc26ba9a9, 0xba73c9c9, 0xd96cb5b5, 0xdc6db1b1, 0x375a6d6d, 0x15504545,
- 0xb98f3636, 0x771b6c6c, 0x13adbebe, 0xda904a4a, 0x57b9eeee, 0xa9de7777, 0x4cbef2f2, 0x837efdfd, 0x55114444, 0xbdda6767, 0x2c5d7171, 0x45400505, 0x631f7c7c, 0x50104040, 0x325b6969, 0xb8db6363,
- 0x220a2828, 0xc5c20707, 0xf531c4c4, 0xa88a2222, 0x31a79696, 0xf9ce3737, 0x977aeded, 0x49bff6f6, 0x992db4b4, 0xa475d1d1, 0x90d34343, 0x5a124848, 0x58bae2e2, 0x71e69797, 0x64b6d2d2, 0x70b2c2c2,
- 0xad8b2626, 0xcd68a5a5, 0xcb955e5e, 0x624b2929, 0x3c0c3030, 0xce945a5a, 0xab76dddd, 0x867ff9f9, 0xf1649595, 0x5dbbe6e6, 0x35f2c7c7, 0x2d092424, 0xd1c61717, 0xd66fb9b9, 0xdec51b1b, 0x94861212,
- 0x78186060, 0x30f3c3c3, 0x897cf5f5, 0x5cefb3b3, 0xd23ae8e8, 0xacdf7373, 0x794c3535, 0xa0208080, 0x9d78e5e5, 0x56edbbbb, 0x235e7d7d, 0xc63ef8f8, 0x8bd45f5f, 0xe7c82f2f, 0xdd39e4e4, 0x68492121,
-}
-
-func rl(x uint32, i uint8) uint32 { return (x << (i % 32)) | (x >> (32 - (i % 32))) }
-
-func l0(b uint32) uint32 { return b ^ rl(b, 13) ^ rl(b, 23) }
-
-func feistel0(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l0(p(x1^x2^x3^rk)) }
-
-//非线性变换τ(.)
-func p(a uint32) uint32 {
- return (uint32(sbox[a>>24]) << 24) ^ (uint32(sbox[(a>>16)&0xff]) << 16) ^ (uint32(sbox[(a>>8)&0xff]) << 8) ^ uint32(sbox[(a)&0xff])
-}
-
-func permuteInitialBlock(b []uint32, block []byte) {
- for i := 0; i < 4; i++ {
- b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
- (uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
- }
-}
-
-func permuteFinalBlock(b []byte, block []uint32) {
- for i := 0; i < 4; i++ {
- b[i*4] = uint8(block[i] >> 24)
- b[i*4+1] = uint8(block[i] >> 16)
- b[i*4+2] = uint8(block[i] >> 8)
- b[i*4+3] = uint8(block[i])
- }
-}
-
-//修改后的加密核心函数
-func cryptBlock(subkeys []uint32, b []uint32, r []byte, dst, src []byte, decrypt bool) {
- permuteInitialBlock(b, src)
-
- // bounds check elimination in major encryption loop
- // https://go101.org/article/bounds-check-elimination.html
- _ = b[3]
- if decrypt {
- for i := 0; i < 8; i++ {
- s := subkeys[31-4*i-3 : 31-4*i-3+4]
- x := b[1] ^ b[2] ^ b[3] ^ s[3]
- b[0] = b[0] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[0] ^ b[2] ^ b[3] ^ s[2]
- b[1] = b[1] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[0] ^ b[1] ^ b[3] ^ s[1]
- b[2] = b[2] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[1] ^ b[2] ^ b[0] ^ s[0]
- b[3] = b[3] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- }
- } else {
- for i := 0; i < 8; i++ {
- s := subkeys[4*i : 4*i+4]
- x := b[1] ^ b[2] ^ b[3] ^ s[0]
- b[0] = b[0] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[0] ^ b[2] ^ b[3] ^ s[1]
- b[1] = b[1] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[0] ^ b[1] ^ b[3] ^ s[2]
- b[2] = b[2] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- x = b[1] ^ b[2] ^ b[0] ^ s[3]
- b[3] = b[3] ^ sbox0[x&0xff] ^ sbox1[(x>>8)&0xff] ^ sbox2[(x>>16)&0xff] ^ sbox3[(x>>24)&0xff]
- }
- }
- b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
- permuteFinalBlock(r, b)
- copy(dst, r)
-}
-
-func generateSubKeys(key []byte) []uint32 {
- subkeys := make([]uint32, 32)
- b := make([]uint32, 4)
- permuteInitialBlock(b, key)
- b[0] ^= fk[0]
- b[1] ^= fk[1]
- b[2] ^= fk[2]
- b[3] ^= fk[3]
- for i := 0; i < 32; i++ {
- subkeys[i] = feistel0(b[0], b[1], b[2], b[3], ck[i])
- b[0], b[1], b[2], b[3] = b[1], b[2], b[3], subkeys[i]
- }
- return subkeys
-}
-
-// NewCipher creates and returns a new cipher.Block.
-func NewCipher(key []byte) (cipher.Block, error) {
- if len(key) != BlockSize {
- return nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- c := new(Sm4Cipher)
- c.subkeys = generateSubKeys(key)
- c.block1 = make([]uint32, 4)
- c.block2 = make([]byte, 16)
- return c, nil
-}
-
-func (c *Sm4Cipher) BlockSize() int {
- return BlockSize
-}
-
-func (c *Sm4Cipher) Encrypt(dst, src []byte) {
- cryptBlock(c.subkeys, c.block1, c.block2, dst, src, false)
-}
-
-
-
-func (c *Sm4Cipher) Decrypt(dst, src []byte) {
- cryptBlock(c.subkeys, c.block1, c.block2, dst, src, true)
-}
-
-
-
-func xor(in, iv []byte) (out []byte) {
- if len(in) != len(iv) {
- return nil
- }
-
- out = make([]byte, len(in))
- for i := 0; i < len(in); i++ {
- out[i] = in[i] ^ iv[i]
- }
- return
-}
-
-func pkcs7Padding(src []byte) []byte {
- padding := BlockSize - len(src)%BlockSize
- padtext := bytes.Repeat([]byte{byte(padding)}, padding)
- return append(src, padtext...)
-}
-
-func pkcs7UnPadding(src []byte) ([]byte, error) {
- length := len(src)
- unpadding := int(src[length-1])
- if unpadding > BlockSize || unpadding == 0 {
- return nil, errors.New("Invalid pkcs7 padding (unpadding > BlockSize || unpadding == 0)")
- }
-
- pad := src[len(src)-unpadding:]
- for i := 0; i < unpadding; i++ {
- if pad[i] != byte(unpadding) {
- return nil, errors.New("Invalid pkcs7 padding (pad[i] != unpadding)")
- }
- }
-
- return src[:(length - unpadding)], nil
-}
-func SetIV(iv []byte)error{
- if len(iv)!=BlockSize{
- return errors.New("SM4: invalid iv size")
- }
- IV=iv
- return nil
-}
-
-func Sm4Cbc(key []byte, in []byte, mode bool) (out []byte, err error) {
- if len(key) != BlockSize {
- return nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- var inData []byte
- if mode {
- inData = pkcs7Padding(in)
- } else {
- inData = in
- }
- iv:=make([]byte,BlockSize)
- copy(iv,IV)
- out = make([]byte, len(inData))
- c, err := NewCipher(key)
- if err != nil {
- panic(err)
- }
- if mode {
- for i := 0; i < len(inData)/16; i++ {
- in_tmp := xor(inData[i*16:i*16+16], iv)
- out_tmp := make([]byte, 16)
- c.Encrypt(out_tmp, in_tmp)
- copy(out[i*16:i*16+16], out_tmp)
- iv = out_tmp
- }
- } else {
- for i := 0; i < len(inData)/16; i++ {
- in_tmp := inData[i*16 : i*16+16]
- out_tmp := make([]byte, 16)
- c.Decrypt(out_tmp, in_tmp)
- out_tmp = xor(out_tmp, iv)
- copy(out[i*16:i*16+16], out_tmp)
- iv = in_tmp
- }
- out, _ = pkcs7UnPadding(out)
- }
-
- return out, nil
-}
-func Sm4Ecb(key []byte, in []byte, mode bool) (out []byte, err error) {
- if len(key) != BlockSize {
- return nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- var inData []byte
- if mode {
- inData = pkcs7Padding(in)
- } else {
- inData = in
- }
- out = make([]byte, len(inData))
- c, err := NewCipher(key)
- if err != nil {
- panic(err)
- }
- if mode {
- for i := 0; i < len(inData)/16; i++ {
- in_tmp := inData[i*16 : i*16+16]
- out_tmp := make([]byte, 16)
- c.Encrypt(out_tmp, in_tmp)
- copy(out[i*16:i*16+16], out_tmp)
- }
- } else {
- for i := 0; i < len(inData)/16; i++ {
- in_tmp := inData[i*16 : i*16+16]
- out_tmp := make([]byte, 16)
- c.Decrypt(out_tmp, in_tmp)
- copy(out[i*16:i*16+16], out_tmp)
- }
- out, _ = pkcs7UnPadding(out)
- }
-
- return out, nil
-}
-
-//密码反馈模式(Cipher FeedBack (CFB))
-//https://blog.csdn.net/zy_strive_2012/article/details/102520356
-//https://blog.csdn.net/sinat_23338865/article/details/72869841
-func Sm4CFB(key []byte, in []byte, mode bool) (out []byte, err error) {
- if len(key) != BlockSize {
- return nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- var inData []byte
- if mode {
- inData = pkcs7Padding(in)
- } else {
- inData = in
- }
-
- out = make([]byte, len(inData))
- c, err := NewCipher(key)
- if err != nil {
- panic(err)
- }
-
- K := make([]byte, BlockSize)
- cipherBlock := make([]byte, BlockSize)
- plainBlock := make([]byte, BlockSize)
- if mode { //加密
- for i := 0; i < len(inData)/16; i++ {
- if i == 0 {
- c.Encrypt(K, IV)
- cipherBlock = xor(K[:BlockSize], inData[i*16:i*16+16])
- copy(out[i*16:i*16+16], cipherBlock)
- //copy(cipherBlock,out_tmp)
- continue
- }
- c.Encrypt(K, cipherBlock)
- cipherBlock = xor(K[:BlockSize], inData[i*16:i*16+16])
- copy(out[i*16:i*16+16], cipherBlock)
- //copy(cipherBlock,out_tmp)
- }
-
- } else { //解密
- var i int = 0
- for ; i < len(inData)/16; i++ {
- if i == 0 {
- c.Encrypt(K, IV) //这里是加密,而不是调用解密方法Decrypt
- plainBlock = xor(K[:BlockSize], inData[i*16:i*16+16]) //获取明文分组
- copy(out[i*16:i*16+16], plainBlock)
- continue
- }
- c.Encrypt(K, inData[(i-1)*16:(i-1)*16+16])
- plainBlock = xor(K[:BlockSize], inData[i*16:i*16+16]) //获取明文分组
- copy(out[i*16:i*16+16], plainBlock)
-
- }
-
- out, _ = pkcs7UnPadding(out)
- }
-
- return out, nil
-}
-
-//输出反馈模式(Output feedback, OFB)
-//https://blog.csdn.net/chengqiuming/article/details/82390910
-//https://blog.csdn.net/sinat_23338865/article/details/72869841
-func Sm4OFB(key []byte, in []byte, mode bool) (out []byte, err error) {
- if len(key) != BlockSize {
- return nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- var inData []byte
- if mode {
- inData = pkcs7Padding(in)
- } else {
- inData = in
- }
-
- out = make([]byte, len(inData))
- c, err := NewCipher(key)
- if err != nil {
- panic(err)
- }
-
- K := make([]byte, BlockSize)
- cipherBlock := make([]byte, BlockSize)
- plainBlock := make([]byte, BlockSize)
- shiftIV := make([]byte, BlockSize)
- if mode { //加密
- for i := 0; i < len(inData)/16; i++ {
- if i == 0 {
- c.Encrypt(K, IV)
- cipherBlock = xor(K[:BlockSize], inData[i*16:i*16+16])
- copy(out[i*16:i*16+16], cipherBlock)
- copy(shiftIV, K[:BlockSize])
- continue
- }
- c.Encrypt(K, shiftIV)
- cipherBlock = xor(K[:BlockSize], inData[i*16:i*16+16])
- copy(out[i*16:i*16+16], cipherBlock)
- copy(shiftIV, K[:BlockSize])
- }
-
- } else { //解密
- for i := 0; i < len(inData)/16; i++ {
- if i == 0 {
- c.Encrypt(K, IV) //这里是加密,而不是调用解密方法Decrypt
- plainBlock = xor(K[:BlockSize], inData[i*16:i*16+16]) //获取明文分组
- copy(out[i*16:i*16+16], plainBlock)
- copy(shiftIV, K[:BlockSize])
- continue
- }
- c.Encrypt(K, shiftIV)
- plainBlock = xor(K[:BlockSize], inData[i*16:i*16+16]) //获取明文分组
- copy(out[i*16:i*16+16], plainBlock)
- copy(shiftIV, K[:BlockSize])
- }
- out, _ = pkcs7UnPadding(out)
- }
-
- return out, nil
-}
diff --git a/vendor/github.com/tjfoc/gmsm/sm4/sm4_gcm.go b/vendor/github.com/tjfoc/gmsm/sm4/sm4_gcm.go
deleted file mode 100644
index 6257c21..0000000
--- a/vendor/github.com/tjfoc/gmsm/sm4/sm4_gcm.go
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
-Copyright Hyperledger-TWGC All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-writed by Zhiwei Yan, 2020 Oct
-*/
-package sm4
-
-import (
- "errors"
- "strconv"
-)
-
-//Paper: The Galois/Counter Mode of Operation (GCM) David A. Mcgrew,John Viega .2004.
-func Sm4GCM(key []byte, IV ,in, A []byte, mode bool) ([]byte, []byte, error) {
- if len(key) != BlockSize {
- return nil,nil, errors.New("SM4: invalid key size " + strconv.Itoa(len(key)))
- }
- if mode {
- C,T:=GCMEncrypt(key,IV,in,A)
- return C,T,nil
- }else{
- P,_T:=GCMDecrypt(key,IV,in,A)
- return P,_T,nil
- }
-}
-
-func GetH(key []byte) (H []byte){
- c,err := NewCipher(key)
- if err != nil {
- panic(err)
- }
-
- zores:=make([]byte, BlockSize)
- H =make([]byte, BlockSize)
- c.Encrypt(H,zores)
- return H
-}
-
-//ut = a + b
-func addition(a ,b []byte) (out []byte){
- Len:=len(a)
- if Len != len(b) {
- return nil
- }
- out = make([]byte, Len)
- for i := 0; i < Len; i++ {
- out[i] = a[i] ^ b[i]
- }
- return out
-}
-
-func Rightshift(V []byte){
- n:=len(V)
- for i:=n-1;i>=0;i-- {
- V[i]=V[i]>>1
- if i!=0{
- V[i]=((V[i-1]&0x01)<<7)|V[i]
- }
- }
-}
-
-func findYi( Y []byte,index int) int{
- var temp byte
- i := uint(index)
- temp=Y[i/8]
- temp=temp>>(7-i%8)
- if temp & 0x01 == 1{
- return 1
- }else{
- return 0
- }
-}
-
-
-func multiplication(X,Y []byte) (Z []byte){
-
- R:=make([]byte,BlockSize)
- R[0]=0xe1
- Z=make([]byte,BlockSize)
- V:=make([]byte,BlockSize)
- copy(V,X)
- for i:=0;i<=127;i++{
- if findYi(Y,i)==1{
- Z=addition(Z,V)
- }
- if V[BlockSize-1]&0x01==0{
- Rightshift(V)
- }else{
- Rightshift(V)
- V=addition(V,R)
- }
- }
- return Z
-}
-
-func GHASH(H []byte,A []byte,C []byte) (X[]byte){
-
- calculm_v:=func(m ,v int) (int,int) {
- if(m==0 && v!=0){
- m=1
- v=v*8
- }else if(m!=0 && v==0) {
- v=BlockSize*8
- }else if(m!=0 && v!=0){
- m=m+1
- v=v*8
- }else { //m==0 && v==0
- m=1
- v=0
- }
- return m,v
- }
- m:=len(A)/BlockSize
- v:=len(A)%BlockSize
- m,v=calculm_v(m,v)
-
- n:=len(C)/BlockSize
- u:=(len(C)%BlockSize)
- n,u=calculm_v(n,u)
-
- //i=0
- X=make([]byte,BlockSize*(m+n+2)) //X0 = 0
- for i:=0;im-1 对于数组来说是 0-->m-2
- }
-
- //i=m
- zeros:=make([]byte,(128-v)/8)
- Am:=make([]byte,v/8)
- copy(Am[:],A[(m-1)*BlockSize:])
- Am=append(Am,zeros...)
- copy(X[m*BlockSize:m*BlockSize+BlockSize],multiplication( addition(X[(m-1)*BlockSize:(m-1)*BlockSize+BlockSize],Am),H))
-
- //i=m+1...m+n-1
- for i:=m+1;i<=(m+n-1);i++{
- copy(X[i*BlockSize:i*BlockSize+BlockSize],multiplication( addition(X[(i-1)*BlockSize:(i-1)*BlockSize+BlockSize],C[(i-m-1)*BlockSize:(i-m-1)*BlockSize+BlockSize]),H))
- }
-
- //i=m+n
- zeros =make([]byte,(128-u)/8)
- Cn:=make([]byte,u/8)
- copy(Cn[:],C[(n-1)*BlockSize:])
- Cn=append(Cn,zeros...)
- copy(X[(m+n)*BlockSize:(m+n)*BlockSize+BlockSize],multiplication( addition(X[(m+n-1)*BlockSize:(m+n-1)*BlockSize+BlockSize],Cn),H))
-
- //i=m+n+1
- var lenAB []byte
- calculateLenToBytes :=func(len int) []byte{
- data:=make([]byte,8)
- data[0]=byte((len>>56)&0xff)
- data[1]=byte((len>>48)&0xff)
- data[2]=byte((len>>40)&0xff)
- data[3]=byte((len>>32)&0xff)
- data[4]=byte((len>>24)&0xff)
- data[5]=byte((len>>16)&0xff)
- data[6]=byte((len>>8)&0xff)
- data[7]=byte((len>>0)&0xff)
- return data
- }
- lenAB=append(lenAB,calculateLenToBytes(len(A))...)
- lenAB=append(lenAB,calculateLenToBytes(len(C))...)
- copy(X[(m+n+1)*BlockSize:(m+n+1)*BlockSize+BlockSize],multiplication(addition(X[(m+n)*BlockSize:(m+n)*BlockSize+BlockSize],lenAB),H))
- return X[(m+n+1)*BlockSize:(m+n+1)*BlockSize+BlockSize]
-}
-
-
-func GetY0(H,IV []byte) []byte{
- if len(IV)*8 == 96 {
- zero31one1:=[]byte{0x00,0x00,0x00,0x01}
- IV=append(IV,zero31one1...)
- return IV
- }else{
- return GHASH(H,[]byte{},IV)
-
- }
-
-}
-
-func incr(n int ,Y_i []byte) (Y_ii []byte) {
-
- Y_ii=make([]byte,BlockSize*n)
- copy(Y_ii,Y_i)
-
- addYone:=func(yi,yii []byte){
- copy(yii[:],yi[:])
-
- Len:=len(yi)
- var rc byte=0x00
- for i:=Len-1;i>=0;i--{
- if(i==Len-1){
- if(yii[i]<0xff){
- yii[i]=yii[i]+0x01
- rc=0x00
- }else{
- yii[i]=0x00
- rc=0x01
- }
- }else{
- if yii[i]+rc<0xff {
- yii[i]=yii[i]+rc
- rc=0x00
- }else{
- yii[i]=0x00
- rc=0x01
- }
- }
- }
- }
- for i:=1;i
-
-
-[![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8]
-
-[1]: https://godoc.org/github.com/xtaci/kcp-go?status.svg
-[2]: https://godoc.org/github.com/xtaci/kcp-go
-[3]: https://travis-ci.org/xtaci/kcp-go.svg?branch=master
-[4]: https://travis-ci.org/xtaci/kcp-go
-[5]: https://goreportcard.com/badge/github.com/xtaci/kcp-go
-[6]: https://goreportcard.com/report/github.com/xtaci/kcp-go
-[7]: https://codecov.io/gh/xtaci/kcp-go/branch/master/graph/badge.svg
-[8]: https://codecov.io/gh/xtaci/kcp-go
-[9]: https://img.shields.io/badge/KCP-Powered-blue.svg
-[10]: https://github.com/skywind3000/kcp
-[11]: https://img.shields.io/badge/license-MIT-blue.svg
-[12]: LICENSE
-
-## Introduction
-
-**kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/).
-
-This library intents to provide a **smooth, resilient, ordered, error-checked and anonymous** delivery of streams over **UDP** packets, it has been battle-tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) have deployed **kcp-go** powered program in a variety of forms like **online games, live broadcasting, file synchronization and network acceleration**.
-
-[Lastest Release](https://github.com/xtaci/kcp-go/releases)
-
-## Features
-
-1. Designed for **Latency-sensitive** scenarios.
-1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
-1. Handles **>5K concurrent connections** on a single commodity server.
-1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
-1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
-1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode, which generates completely anonymous packet.
-1. Only **A fixed number of goroutines** will be created for the entire server application, costs in **context switch** between goroutines have been taken into consideration.
-1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with various improvements.
-1. Platform-dependent optimizations: [sendmmsg](http://man7.org/linux/man-pages/man2/sendmmsg.2.html) and [recvmmsg](http://man7.org/linux/man-pages/man2/recvmmsg.2.html) were expoloited for linux.
-
-## Documentation
-
-For complete documentation, see the associated [Godoc](https://godoc.org/github.com/xtaci/kcp-go).
-
-## Specification
-
-
-
-```
-NONCE:
- 16bytes cryptographically secure random number, nonce changes for every packet.
-
-CRC32:
- CRC-32 checksum of data using the IEEE polynomial
-
-FEC TYPE:
- typeData = 0xF1
- typeParity = 0xF2
-
-FEC SEQID:
- monotonically increasing in range: [0, (0xffffffff/shardSize) * shardSize - 1]
-
-SIZE:
- The size of KCP frame plus 2
-```
-
-```
-+-----------------+
-| SESSION |
-+-----------------+
-| KCP(ARQ) |
-+-----------------+
-| FEC(OPTIONAL) |
-+-----------------+
-| CRYPTO(OPTIONAL)|
-+-----------------+
-| UDP(PACKET) |
-+-----------------+
-| IP |
-+-----------------+
-| LINK |
-+-----------------+
-| PHY |
-+-----------------+
-(LAYER MODEL OF KCP-GO)
-```
-
-
-## Examples
-
-1. [simple examples](https://github.com/xtaci/kcp-go/tree/master/examples)
-2. [kcptun client](https://github.com/xtaci/kcptun/blob/master/client/main.go)
-3. [kcptun server](https://github.com/xtaci/kcptun/blob/master/server/main.go)
-
-## Benchmark
-```
- Model Name: MacBook Pro
- Model Identifier: MacBookPro14,1
- Processor Name: Intel Core i5
- Processor Speed: 3.1 GHz
- Number of Processors: 1
- Total Number of Cores: 2
- L2 Cache (per Core): 256 KB
- L3 Cache: 4 MB
- Memory: 8 GB
-```
-```
-$ go test -v -run=^$ -bench .
-beginning tests, encryption:salsa20, fec:10/3
-goos: darwin
-goarch: amd64
-pkg: github.com/xtaci/kcp-go
-BenchmarkSM4-4 50000 32180 ns/op 93.23 MB/s 0 B/op 0 allocs/op
-BenchmarkAES128-4 500000 3285 ns/op 913.21 MB/s 0 B/op 0 allocs/op
-BenchmarkAES192-4 300000 3623 ns/op 827.85 MB/s 0 B/op 0 allocs/op
-BenchmarkAES256-4 300000 3874 ns/op 774.20 MB/s 0 B/op 0 allocs/op
-BenchmarkTEA-4 100000 15384 ns/op 195.00 MB/s 0 B/op 0 allocs/op
-BenchmarkXOR-4 20000000 89.9 ns/op 33372.00 MB/s 0 B/op 0 allocs/op
-BenchmarkBlowfish-4 50000 26927 ns/op 111.41 MB/s 0 B/op 0 allocs/op
-BenchmarkNone-4 30000000 45.7 ns/op 65597.94 MB/s 0 B/op 0 allocs/op
-BenchmarkCast5-4 50000 34258 ns/op 87.57 MB/s 0 B/op 0 allocs/op
-Benchmark3DES-4 10000 117149 ns/op 25.61 MB/s 0 B/op 0 allocs/op
-BenchmarkTwofish-4 50000 33538 ns/op 89.45 MB/s 0 B/op 0 allocs/op
-BenchmarkXTEA-4 30000 45666 ns/op 65.69 MB/s 0 B/op 0 allocs/op
-BenchmarkSalsa20-4 500000 3308 ns/op 906.76 MB/s 0 B/op 0 allocs/op
-BenchmarkCRC32-4 20000000 65.2 ns/op 15712.43 MB/s
-BenchmarkCsprngSystem-4 1000000 1150 ns/op 13.91 MB/s
-BenchmarkCsprngMD5-4 10000000 145 ns/op 110.26 MB/s
-BenchmarkCsprngSHA1-4 10000000 158 ns/op 126.54 MB/s
-BenchmarkCsprngNonceMD5-4 10000000 153 ns/op 104.22 MB/s
-BenchmarkCsprngNonceAES128-4 100000000 19.1 ns/op 837.81 MB/s
-BenchmarkFECDecode-4 1000000 1119 ns/op 1339.61 MB/s 1606 B/op 2 allocs/op
-BenchmarkFECEncode-4 2000000 832 ns/op 1801.83 MB/s 17 B/op 0 allocs/op
-BenchmarkFlush-4 5000000 272 ns/op 0 B/op 0 allocs/op
-BenchmarkEchoSpeed4K-4 5000 259617 ns/op 15.78 MB/s 5451 B/op 149 allocs/op
-BenchmarkEchoSpeed64K-4 1000 1706084 ns/op 38.41 MB/s 56002 B/op 1604 allocs/op
-BenchmarkEchoSpeed512K-4 100 14345505 ns/op 36.55 MB/s 482597 B/op 13045 allocs/op
-BenchmarkEchoSpeed1M-4 30 34859104 ns/op 30.08 MB/s 1143773 B/op 27186 allocs/op
-BenchmarkSinkSpeed4K-4 50000 31369 ns/op 130.57 MB/s 1566 B/op 30 allocs/op
-BenchmarkSinkSpeed64K-4 5000 329065 ns/op 199.16 MB/s 21529 B/op 453 allocs/op
-BenchmarkSinkSpeed256K-4 500 2373354 ns/op 220.91 MB/s 166332 B/op 3554 allocs/op
-BenchmarkSinkSpeed1M-4 300 5117927 ns/op 204.88 MB/s 310378 B/op 6988 allocs/op
-PASS
-ok github.com/xtaci/kcp-go 50.349s
-```
-
-
-## Typical Flame Graph
-![Flame Graph in kcptun](flame.png)
-
-## Key Design Considerations
-
-1. slice vs. container/list
-
-`kcp.flush()` loops through the send queue for retransmission checking for every 20ms(interval).
-
-I've wrote a benchmark for comparing sequential loop through *slice* and *container/list* here:
-
-https://github.com/xtaci/notes/blob/master/golang/benchmark2/cachemiss_test.go
-
-```
-BenchmarkLoopSlice-4 2000000000 0.39 ns/op
-BenchmarkLoopList-4 100000000 54.6 ns/op
-```
-
-List structure introduces **heavy cache misses** compared to slice which owns better **locality**, 5000 connections with 32 window size and 20ms interval will cost 6us/0.03%(cpu) using slice, and 8.7ms/43.5%(cpu) for list for each `kcp.flush()`.
-
-2. Timing accuracy vs. syscall clock_gettime
-
-Timing is **critical** to **RTT estimator**, inaccurate timing leads to false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz).
-
-The benchmark for time.Now() lies here:
-
-https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
-
-```
-BenchmarkNow-4 100000000 15.6 ns/op
-```
-
-In kcp-go, after each `kcp.output()` function call, current clock time will be updated upon return, and for a single `kcp.flush()` operation, current time will be queried from system once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(a fixed cost while no packet needs to be sent), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
-
-3. Memory management
-
-Primary memory allocation are done from a global buffer pool xmit.Buf, in kcp-go, when we need to allocate some bytes, we can get from that pool, and a fixed-capacity 1500 bytes(mtuLimit) will be returned, the rx queue, tx queue and fec queue all receive bytes from there, and they will return the bytes to the pool after using to prevent unnecessary zer0ing of bytes. The pool mechanism maintained a high watermark for slice objects, these in-flight objects from the pool will survive from the perodical garbage collection, meanwhile the pool kept the ability to return the memory to runtime if in idle.
-
-4. Information security
-
-kcp-go is shipped with builtin packet encryption powered by various block encryption algorithms and works in [Cipher Feedback Mode](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_(CFB)), for each packet to be sent, the encryption process will start from encrypting a [nonce](https://en.wikipedia.org/wiki/Cryptographic_nonce) from the [system entropy](https://en.wikipedia.org/wiki//dev/random), so encryption to same plaintexts never leads to a same ciphertexts thereafter.
-
-The contents of the packets are completely anonymous with encryption, including the headers(FEC,KCP), checksums and contents. Note that, no matter which encryption method you choose on you upper layer, if you disable encryption, the transmit will be insecure somehow, since the header is ***PLAINTEXT*** to everyone it would be susceptible to header tampering, such as jamming the *sliding window size*, *round-trip time*, *FEC property* and *checksums*. ```AES-128``` is suggested for minimal encryption since modern CPUs are shipped with [AES-NI](https://en.wikipedia.org/wiki/AES_instruction_set) instructions and performs even better than `salsa20`(check the table above).
-
-Other possible attacks to kcp-go includes: a) [traffic analysis](https://en.wikipedia.org/wiki/Traffic_analysis), dataflow on specific websites may have pattern while interchanging data, but this type of eavesdropping has been mitigated by adapting [smux](https://github.com/xtaci/smux) to mix data streams so as to introduce noises, perfect solution to this has not appeared yet, theroretically by shuffling/mixing messages on larger scale network may mitigate this problem. b) [replay attack](https://en.wikipedia.org/wiki/Replay_attack), since the asymmetrical encryption has not been introduced into kcp-go for some reason, capturing the packets and replay them on a different machine is possible, (notice: hijacking the session and decrypting the contents is still *impossible*), so upper layers should contain a asymmetrical encryption system to guarantee the authenticity of each message(to process message exactly once), such as HTTPS/OpenSSL/LibreSSL, only by signing the requests with private keys can eliminate this type of attack.
-
-## Connection Termination
-
-Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
-
-## FAQ
-
-Q: I'm handling >5K connections on my server, the CPU utilization is so high.
-
-A: A standalone `agent` or `gate` server for running kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements(timing) which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load, but lower the performance.
-
-Q: When should I enable FEC?
-
-A: Forward error correction is critical to long-distance transmission, because a packet loss will lead to a huge penalty in time. And for the complicated packet routing network in modern world, round-trip time based loss check will not always be efficient, the big deviation of RTT samples in the long way usually leads to a larger RTO value in typical rtt estimator, which in other words, slows down the transmission.
-
-Q: Should I enable encryption?
-
-A: Yes, for the safety of protocol, even if the upper layer has encrypted.
-
-## Who is using this?
-
-1. https://github.com/xtaci/kcptun -- A Secure Tunnel Based On KCP over UDP.
-2. https://github.com/getlantern/lantern -- Lantern delivers fast access to the open Internet.
-3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
-4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
-5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
-
-## Links
-
-1. https://github.com/xtaci/smux/ -- A Stream Multiplexing Library for golang with least memory
-1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
-1. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
-1. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go
-
-## Consulting
-
-WeChat(付费技术咨询)
-
-
diff --git a/vendor/github.com/xtaci/kcp-go/batchconn.go b/vendor/github.com/xtaci/kcp-go/batchconn.go
deleted file mode 100644
index 6c30701..0000000
--- a/vendor/github.com/xtaci/kcp-go/batchconn.go
+++ /dev/null
@@ -1,12 +0,0 @@
-package kcp
-
-import "golang.org/x/net/ipv4"
-
-const (
- batchSize = 16
-)
-
-type batchConn interface {
- WriteBatch(ms []ipv4.Message, flags int) (int, error)
- ReadBatch(ms []ipv4.Message, flags int) (int, error)
-}
diff --git a/vendor/github.com/xtaci/kcp-go/crypt.go b/vendor/github.com/xtaci/kcp-go/crypt.go
deleted file mode 100644
index 958fdea..0000000
--- a/vendor/github.com/xtaci/kcp-go/crypt.go
+++ /dev/null
@@ -1,785 +0,0 @@
-package kcp
-
-import (
- "crypto/aes"
- "crypto/cipher"
- "crypto/des"
- "crypto/sha1"
-
- "github.com/templexxx/xor"
- "github.com/tjfoc/gmsm/sm4"
-
- "golang.org/x/crypto/blowfish"
- "golang.org/x/crypto/cast5"
- "golang.org/x/crypto/pbkdf2"
- "golang.org/x/crypto/salsa20"
- "golang.org/x/crypto/tea"
- "golang.org/x/crypto/twofish"
- "golang.org/x/crypto/xtea"
-)
-
-var (
- initialVector = []byte{167, 115, 79, 156, 18, 172, 27, 1, 164, 21, 242, 193, 252, 120, 230, 107}
- saltxor = `sH3CIVoF#rWLtJo6`
-)
-
-// BlockCrypt defines encryption/decryption methods for a given byte slice.
-// Notes on implementing: the data to be encrypted contains a builtin
-// nonce at the first 16 bytes
-type BlockCrypt interface {
- // Encrypt encrypts the whole block in src into dst.
- // Dst and src may point at the same memory.
- Encrypt(dst, src []byte)
-
- // Decrypt decrypts the whole block in src into dst.
- // Dst and src may point at the same memory.
- Decrypt(dst, src []byte)
-}
-
-type salsa20BlockCrypt struct {
- key [32]byte
-}
-
-// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20
-func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(salsa20BlockCrypt)
- copy(c.key[:], key)
- return c, nil
-}
-
-func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) {
- salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
- copy(dst[:8], src[:8])
-}
-func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
- salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
- copy(dst[:8], src[:8])
-}
-
-type sm4BlockCrypt struct {
- encbuf [sm4.BlockSize]byte
- decbuf [2 * sm4.BlockSize]byte
- block cipher.Block
-}
-
-// NewSM4BlockCrypt https://github.com/tjfoc/gmsm/tree/master/sm4
-func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(sm4BlockCrypt)
- block, err := sm4.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type twofishBlockCrypt struct {
- encbuf [twofish.BlockSize]byte
- decbuf [2 * twofish.BlockSize]byte
- block cipher.Block
-}
-
-// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish
-func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(twofishBlockCrypt)
- block, err := twofish.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type tripleDESBlockCrypt struct {
- encbuf [des.BlockSize]byte
- decbuf [2 * des.BlockSize]byte
- block cipher.Block
-}
-
-// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES
-func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(tripleDESBlockCrypt)
- block, err := des.NewTripleDESCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type cast5BlockCrypt struct {
- encbuf [cast5.BlockSize]byte
- decbuf [2 * cast5.BlockSize]byte
- block cipher.Block
-}
-
-// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128
-func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(cast5BlockCrypt)
- block, err := cast5.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type blowfishBlockCrypt struct {
- encbuf [blowfish.BlockSize]byte
- decbuf [2 * blowfish.BlockSize]byte
- block cipher.Block
-}
-
-// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher)
-func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(blowfishBlockCrypt)
- block, err := blowfish.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type aesBlockCrypt struct {
- encbuf [aes.BlockSize]byte
- decbuf [2 * aes.BlockSize]byte
- block cipher.Block
-}
-
-// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
-func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(aesBlockCrypt)
- block, err := aes.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type teaBlockCrypt struct {
- encbuf [tea.BlockSize]byte
- decbuf [2 * tea.BlockSize]byte
- block cipher.Block
-}
-
-// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm
-func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(teaBlockCrypt)
- block, err := tea.NewCipherWithRounds(key, 16)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type xteaBlockCrypt struct {
- encbuf [xtea.BlockSize]byte
- decbuf [2 * xtea.BlockSize]byte
- block cipher.Block
-}
-
-// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA
-func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(xteaBlockCrypt)
- block, err := xtea.NewCipher(key)
- if err != nil {
- return nil, err
- }
- c.block = block
- return c, nil
-}
-
-func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
-
-type simpleXORBlockCrypt struct {
- xortbl []byte
-}
-
-// NewSimpleXORBlockCrypt simple xor with key expanding
-func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(simpleXORBlockCrypt)
- c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New)
- return c, nil
-}
-
-func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
-func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
-
-type noneBlockCrypt struct{}
-
-// NewNoneBlockCrypt does nothing but copying
-func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) {
- return new(noneBlockCrypt), nil
-}
-
-func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
-func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
-
-// packet encryption with local CFB mode
-func encrypt(block cipher.Block, dst, src, buf []byte) {
- switch block.BlockSize() {
- case 8:
- encrypt8(block, dst, src, buf)
- case 16:
- encrypt16(block, dst, src, buf)
- default:
- encryptVariant(block, dst, src, buf)
- }
-}
-
-// optimized encryption for the ciphers which works in 8-bytes
-func encrypt8(block cipher.Block, dst, src, buf []byte) {
- tbl := buf[:8]
- block.Encrypt(tbl, initialVector)
- n := len(src) / 8
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- s := src[base:][0:64]
- d := dst[base:][0:64]
- // 1
- xor.BytesSrc1(d[0:8], s[0:8], tbl)
- block.Encrypt(tbl, d[0:8])
- // 2
- xor.BytesSrc1(d[8:16], s[8:16], tbl)
- block.Encrypt(tbl, d[8:16])
- // 3
- xor.BytesSrc1(d[16:24], s[16:24], tbl)
- block.Encrypt(tbl, d[16:24])
- // 4
- xor.BytesSrc1(d[24:32], s[24:32], tbl)
- block.Encrypt(tbl, d[24:32])
- // 5
- xor.BytesSrc1(d[32:40], s[32:40], tbl)
- block.Encrypt(tbl, d[32:40])
- // 6
- xor.BytesSrc1(d[40:48], s[40:48], tbl)
- block.Encrypt(tbl, d[40:48])
- // 7
- xor.BytesSrc1(d[48:56], s[48:56], tbl)
- block.Encrypt(tbl, d[48:56])
- // 8
- xor.BytesSrc1(d[56:64], s[56:64], tbl)
- block.Encrypt(tbl, d[56:64])
- base += 64
- }
-
- switch left {
- case 7:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 6:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 5:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 4:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 3:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 2:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 1:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 8
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
-
-// optimized encryption for the ciphers which works in 16-bytes
-func encrypt16(block cipher.Block, dst, src, buf []byte) {
- tbl := buf[:16]
- block.Encrypt(tbl, initialVector)
- n := len(src) / 16
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- s := src[base:][0:128]
- d := dst[base:][0:128]
- // 1
- xor.BytesSrc1(d[0:16], s[0:16], tbl)
- block.Encrypt(tbl, d[0:16])
- // 2
- xor.BytesSrc1(d[16:32], s[16:32], tbl)
- block.Encrypt(tbl, d[16:32])
- // 3
- xor.BytesSrc1(d[32:48], s[32:48], tbl)
- block.Encrypt(tbl, d[32:48])
- // 4
- xor.BytesSrc1(d[48:64], s[48:64], tbl)
- block.Encrypt(tbl, d[48:64])
- // 5
- xor.BytesSrc1(d[64:80], s[64:80], tbl)
- block.Encrypt(tbl, d[64:80])
- // 6
- xor.BytesSrc1(d[80:96], s[80:96], tbl)
- block.Encrypt(tbl, d[80:96])
- // 7
- xor.BytesSrc1(d[96:112], s[96:112], tbl)
- block.Encrypt(tbl, d[96:112])
- // 8
- xor.BytesSrc1(d[112:128], s[112:128], tbl)
- block.Encrypt(tbl, d[112:128])
- base += 128
- }
-
- switch left {
- case 7:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 6:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 5:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 4:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 3:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 2:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 1:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += 16
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
-
-func encryptVariant(block cipher.Block, dst, src, buf []byte) {
- blocksize := block.BlockSize()
- tbl := buf[:blocksize]
- block.Encrypt(tbl, initialVector)
- n := len(src) / blocksize
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- // 1
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 2
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 3
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 4
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 5
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 6
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 7
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
-
- // 8
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- }
-
- switch left {
- case 7:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 6:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 5:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 4:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 3:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 2:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 1:
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- block.Encrypt(tbl, dst[base:])
- base += blocksize
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
-
-// decryption
-func decrypt(block cipher.Block, dst, src, buf []byte) {
- switch block.BlockSize() {
- case 8:
- decrypt8(block, dst, src, buf)
- case 16:
- decrypt16(block, dst, src, buf)
- default:
- decryptVariant(block, dst, src, buf)
- }
-}
-
-func decrypt8(block cipher.Block, dst, src, buf []byte) {
- tbl := buf[0:8]
- next := buf[8:16]
- block.Encrypt(tbl, initialVector)
- n := len(src) / 8
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- s := src[base:][0:64]
- d := dst[base:][0:64]
- // 1
- block.Encrypt(next, s[0:8])
- xor.BytesSrc1(d[0:8], s[0:8], tbl)
- // 2
- block.Encrypt(tbl, s[8:16])
- xor.BytesSrc1(d[8:16], s[8:16], next)
- // 3
- block.Encrypt(next, s[16:24])
- xor.BytesSrc1(d[16:24], s[16:24], tbl)
- // 4
- block.Encrypt(tbl, s[24:32])
- xor.BytesSrc1(d[24:32], s[24:32], next)
- // 5
- block.Encrypt(next, s[32:40])
- xor.BytesSrc1(d[32:40], s[32:40], tbl)
- // 6
- block.Encrypt(tbl, s[40:48])
- xor.BytesSrc1(d[40:48], s[40:48], next)
- // 7
- block.Encrypt(next, s[48:56])
- xor.BytesSrc1(d[48:56], s[48:56], tbl)
- // 8
- block.Encrypt(tbl, s[56:64])
- xor.BytesSrc1(d[56:64], s[56:64], next)
- base += 64
- }
-
- switch left {
- case 7:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 6:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 5:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 4:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 3:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 2:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 1:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 8
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
-
-func decrypt16(block cipher.Block, dst, src, buf []byte) {
- tbl := buf[0:16]
- next := buf[16:32]
- block.Encrypt(tbl, initialVector)
- n := len(src) / 16
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- s := src[base:][0:128]
- d := dst[base:][0:128]
- // 1
- block.Encrypt(next, s[0:16])
- xor.BytesSrc1(d[0:16], s[0:16], tbl)
- // 2
- block.Encrypt(tbl, s[16:32])
- xor.BytesSrc1(d[16:32], s[16:32], next)
- // 3
- block.Encrypt(next, s[32:48])
- xor.BytesSrc1(d[32:48], s[32:48], tbl)
- // 4
- block.Encrypt(tbl, s[48:64])
- xor.BytesSrc1(d[48:64], s[48:64], next)
- // 5
- block.Encrypt(next, s[64:80])
- xor.BytesSrc1(d[64:80], s[64:80], tbl)
- // 6
- block.Encrypt(tbl, s[80:96])
- xor.BytesSrc1(d[80:96], s[80:96], next)
- // 7
- block.Encrypt(next, s[96:112])
- xor.BytesSrc1(d[96:112], s[96:112], tbl)
- // 8
- block.Encrypt(tbl, s[112:128])
- xor.BytesSrc1(d[112:128], s[112:128], next)
- base += 128
- }
-
- switch left {
- case 7:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 6:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 5:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 4:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 3:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 2:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 1:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += 16
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
-
-func decryptVariant(block cipher.Block, dst, src, buf []byte) {
- blocksize := block.BlockSize()
- tbl := buf[:blocksize]
- next := buf[blocksize:]
- block.Encrypt(tbl, initialVector)
- n := len(src) / blocksize
- base := 0
- repeat := n / 8
- left := n % 8
- for i := 0; i < repeat; i++ {
- // 1
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- base += blocksize
-
- // 2
- block.Encrypt(tbl, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], next)
- base += blocksize
-
- // 3
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- base += blocksize
-
- // 4
- block.Encrypt(tbl, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], next)
- base += blocksize
-
- // 5
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- base += blocksize
-
- // 6
- block.Encrypt(tbl, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], next)
- base += blocksize
-
- // 7
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- base += blocksize
-
- // 8
- block.Encrypt(tbl, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], next)
- base += blocksize
- }
-
- switch left {
- case 7:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 6:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 5:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 4:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 3:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 2:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 1:
- block.Encrypt(next, src[base:])
- xor.BytesSrc1(dst[base:], src[base:], tbl)
- tbl, next = next, tbl
- base += blocksize
- fallthrough
- case 0:
- xor.BytesSrc0(dst[base:], src[base:], tbl)
- }
-}
diff --git a/vendor/github.com/xtaci/kcp-go/donate.png b/vendor/github.com/xtaci/kcp-go/donate.png
deleted file mode 100644
index 0f353d9..0000000
Binary files a/vendor/github.com/xtaci/kcp-go/donate.png and /dev/null differ
diff --git a/vendor/github.com/xtaci/kcp-go/entropy.go b/vendor/github.com/xtaci/kcp-go/entropy.go
deleted file mode 100644
index 156c1cd..0000000
--- a/vendor/github.com/xtaci/kcp-go/entropy.go
+++ /dev/null
@@ -1,52 +0,0 @@
-package kcp
-
-import (
- "crypto/aes"
- "crypto/cipher"
- "crypto/md5"
- "crypto/rand"
- "io"
-)
-
-// Entropy defines a entropy source
-type Entropy interface {
- Init()
- Fill(nonce []byte)
-}
-
-// nonceMD5 nonce generator for packet header
-type nonceMD5 struct {
- seed [md5.Size]byte
-}
-
-func (n *nonceMD5) Init() { /*nothing required*/ }
-
-func (n *nonceMD5) Fill(nonce []byte) {
- if n.seed[0] == 0 { // entropy update
- io.ReadFull(rand.Reader, n.seed[:])
- }
- n.seed = md5.Sum(n.seed[:])
- copy(nonce, n.seed[:])
-}
-
-// nonceAES128 nonce generator for packet headers
-type nonceAES128 struct {
- seed [aes.BlockSize]byte
- block cipher.Block
-}
-
-func (n *nonceAES128) Init() {
- var key [16]byte //aes-128
- io.ReadFull(rand.Reader, key[:])
- io.ReadFull(rand.Reader, n.seed[:])
- block, _ := aes.NewCipher(key[:])
- n.block = block
-}
-
-func (n *nonceAES128) Fill(nonce []byte) {
- if n.seed[0] == 0 { // entropy update
- io.ReadFull(rand.Reader, n.seed[:])
- }
- n.block.Encrypt(n.seed[:], n.seed[:])
- copy(nonce, n.seed[:])
-}
diff --git a/vendor/github.com/xtaci/kcp-go/fec.go b/vendor/github.com/xtaci/kcp-go/fec.go
deleted file mode 100644
index 97cd40b..0000000
--- a/vendor/github.com/xtaci/kcp-go/fec.go
+++ /dev/null
@@ -1,337 +0,0 @@
-package kcp
-
-import (
- "encoding/binary"
- "sync/atomic"
-
- "github.com/klauspost/reedsolomon"
-)
-
-const (
- fecHeaderSize = 6
- fecHeaderSizePlus2 = fecHeaderSize + 2 // plus 2B data size
- typeData = 0xf1
- typeParity = 0xf2
- fecExpire = 60000
-)
-
-// fecPacket is a decoded FEC packet
-type fecPacket []byte
-
-func (bts fecPacket) seqid() uint32 { return binary.LittleEndian.Uint32(bts) }
-func (bts fecPacket) flag() uint16 { return binary.LittleEndian.Uint16(bts[4:]) }
-func (bts fecPacket) data() []byte { return bts[6:] }
-
-// fecElement has auxcilliary time field
-type fecElement struct {
- fecPacket
- ts uint32
-}
-
-// fecDecoder for decoding incoming packets
-type fecDecoder struct {
- rxlimit int // queue size limit
- dataShards int
- parityShards int
- shardSize int
- rx []fecElement // ordered receive queue
-
- // caches
- decodeCache [][]byte
- flagCache []bool
-
- // zeros
- zeros []byte
-
- // RS decoder
- codec reedsolomon.Encoder
-}
-
-func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
- if dataShards <= 0 || parityShards <= 0 {
- return nil
- }
- if rxlimit < dataShards+parityShards {
- return nil
- }
-
- dec := new(fecDecoder)
- dec.rxlimit = rxlimit
- dec.dataShards = dataShards
- dec.parityShards = parityShards
- dec.shardSize = dataShards + parityShards
- codec, err := reedsolomon.New(dataShards, parityShards)
- if err != nil {
- return nil
- }
- dec.codec = codec
- dec.decodeCache = make([][]byte, dec.shardSize)
- dec.flagCache = make([]bool, dec.shardSize)
- dec.zeros = make([]byte, mtuLimit)
- return dec
-}
-
-// decode a fec packet
-func (dec *fecDecoder) decode(in fecPacket) (recovered [][]byte) {
- // insertion
- n := len(dec.rx) - 1
- insertIdx := 0
- for i := n; i >= 0; i-- {
- if in.seqid() == dec.rx[i].seqid() { // de-duplicate
- return nil
- } else if _itimediff(in.seqid(), dec.rx[i].seqid()) > 0 { // insertion
- insertIdx = i + 1
- break
- }
- }
-
- // make a copy
- pkt := fecPacket(xmitBuf.Get().([]byte)[:len(in)])
- copy(pkt, in)
- elem := fecElement{pkt, currentMs()}
-
- // insert into ordered rx queue
- if insertIdx == n+1 {
- dec.rx = append(dec.rx, elem)
- } else {
- dec.rx = append(dec.rx, fecElement{})
- copy(dec.rx[insertIdx+1:], dec.rx[insertIdx:]) // shift right
- dec.rx[insertIdx] = elem
- }
-
- // shard range for current packet
- shardBegin := pkt.seqid() - pkt.seqid()%uint32(dec.shardSize)
- shardEnd := shardBegin + uint32(dec.shardSize) - 1
-
- // max search range in ordered queue for current shard
- searchBegin := insertIdx - int(pkt.seqid()%uint32(dec.shardSize))
- if searchBegin < 0 {
- searchBegin = 0
- }
- searchEnd := searchBegin + dec.shardSize - 1
- if searchEnd >= len(dec.rx) {
- searchEnd = len(dec.rx) - 1
- }
-
- // re-construct datashards
- if searchEnd-searchBegin+1 >= dec.dataShards {
- var numshard, numDataShard, first, maxlen int
-
- // zero caches
- shards := dec.decodeCache
- shardsflag := dec.flagCache
- for k := range dec.decodeCache {
- shards[k] = nil
- shardsflag[k] = false
- }
-
- // shard assembly
- for i := searchBegin; i <= searchEnd; i++ {
- seqid := dec.rx[i].seqid()
- if _itimediff(seqid, shardEnd) > 0 {
- break
- } else if _itimediff(seqid, shardBegin) >= 0 {
- shards[seqid%uint32(dec.shardSize)] = dec.rx[i].data()
- shardsflag[seqid%uint32(dec.shardSize)] = true
- numshard++
- if dec.rx[i].flag() == typeData {
- numDataShard++
- }
- if numshard == 1 {
- first = i
- }
- if len(dec.rx[i].data()) > maxlen {
- maxlen = len(dec.rx[i].data())
- }
- }
- }
-
- if numDataShard == dec.dataShards {
- // case 1: no loss on data shards
- dec.rx = dec.freeRange(first, numshard, dec.rx)
- } else if numshard >= dec.dataShards {
- // case 2: loss on data shards, but it's recoverable from parity shards
- for k := range shards {
- if shards[k] != nil {
- dlen := len(shards[k])
- shards[k] = shards[k][:maxlen]
- copy(shards[k][dlen:], dec.zeros)
- } else if k < dec.dataShards {
- shards[k] = xmitBuf.Get().([]byte)[:0]
- }
- }
- if err := dec.codec.ReconstructData(shards); err == nil {
- for k := range shards[:dec.dataShards] {
- if !shardsflag[k] {
- // recovered data should be recycled
- recovered = append(recovered, shards[k])
- }
- }
- }
- dec.rx = dec.freeRange(first, numshard, dec.rx)
- }
- }
-
- // keep rxlimit
- if len(dec.rx) > dec.rxlimit {
- if dec.rx[0].flag() == typeData { // track the unrecoverable data
- atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
- }
- dec.rx = dec.freeRange(0, 1, dec.rx)
- }
-
- // timeout policy
- current := currentMs()
- numExpired := 0
- for k := range dec.rx {
- if _itimediff(current, dec.rx[k].ts) > fecExpire {
- numExpired++
- continue
- }
- break
- }
- if numExpired > 0 {
- dec.rx = dec.freeRange(0, numExpired, dec.rx)
- }
- return
-}
-
-// free a range of fecPacket
-func (dec *fecDecoder) freeRange(first, n int, q []fecElement) []fecElement {
- for i := first; i < first+n; i++ { // recycle buffer
- xmitBuf.Put([]byte(q[i].fecPacket))
- }
-
- if first == 0 && n < cap(q)/2 {
- return q[n:]
- }
- copy(q[first:], q[first+n:])
- return q[:len(q)-n]
-}
-
-// release all segments back to xmitBuf
-func (dec *fecDecoder) release() {
- if n := len(dec.rx); n > 0 {
- dec.rx = dec.freeRange(0, n, dec.rx)
- }
-}
-
-type (
- // fecEncoder for encoding outgoing packets
- fecEncoder struct {
- dataShards int
- parityShards int
- shardSize int
- paws uint32 // Protect Against Wrapped Sequence numbers
- next uint32 // next seqid
-
- shardCount int // count the number of datashards collected
- maxSize int // track maximum data length in datashard
-
- headerOffset int // FEC header offset
- payloadOffset int // FEC payload offset
-
- // caches
- shardCache [][]byte
- encodeCache [][]byte
-
- // zeros
- zeros []byte
-
- // RS encoder
- codec reedsolomon.Encoder
- }
-)
-
-func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
- if dataShards <= 0 || parityShards <= 0 {
- return nil
- }
- enc := new(fecEncoder)
- enc.dataShards = dataShards
- enc.parityShards = parityShards
- enc.shardSize = dataShards + parityShards
- enc.paws = 0xffffffff / uint32(enc.shardSize) * uint32(enc.shardSize)
- enc.headerOffset = offset
- enc.payloadOffset = enc.headerOffset + fecHeaderSize
-
- codec, err := reedsolomon.New(dataShards, parityShards)
- if err != nil {
- return nil
- }
- enc.codec = codec
-
- // caches
- enc.encodeCache = make([][]byte, enc.shardSize)
- enc.shardCache = make([][]byte, enc.shardSize)
- for k := range enc.shardCache {
- enc.shardCache[k] = make([]byte, mtuLimit)
- }
- enc.zeros = make([]byte, mtuLimit)
- return enc
-}
-
-// encodes the packet, outputs parity shards if we have collected quorum datashards
-// notice: the contents of 'ps' will be re-written in successive calling
-func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
- // The header format:
- // | FEC SEQID(4B) | FEC TYPE(2B) | SIZE (2B) | PAYLOAD(SIZE-2) |
- // |<-headerOffset |<-payloadOffset
- enc.markData(b[enc.headerOffset:])
- binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
-
- // copy data from payloadOffset to fec shard cache
- sz := len(b)
- enc.shardCache[enc.shardCount] = enc.shardCache[enc.shardCount][:sz]
- copy(enc.shardCache[enc.shardCount][enc.payloadOffset:], b[enc.payloadOffset:])
- enc.shardCount++
-
- // track max datashard length
- if sz > enc.maxSize {
- enc.maxSize = sz
- }
-
- // Generation of Reed-Solomon Erasure Code
- if enc.shardCount == enc.dataShards {
- // fill '0' into the tail of each datashard
- for i := 0; i < enc.dataShards; i++ {
- shard := enc.shardCache[i]
- slen := len(shard)
- copy(shard[slen:enc.maxSize], enc.zeros)
- }
-
- // construct equal-sized slice with stripped header
- cache := enc.encodeCache
- for k := range cache {
- cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
- }
-
- // encoding
- if err := enc.codec.Encode(cache); err == nil {
- ps = enc.shardCache[enc.dataShards:]
- for k := range ps {
- enc.markParity(ps[k][enc.headerOffset:])
- ps[k] = ps[k][:enc.maxSize]
- }
- }
-
- // counters resetting
- enc.shardCount = 0
- enc.maxSize = 0
- }
-
- return
-}
-
-func (enc *fecEncoder) markData(data []byte) {
- binary.LittleEndian.PutUint32(data, enc.next)
- binary.LittleEndian.PutUint16(data[4:], typeData)
- enc.next++
-}
-
-func (enc *fecEncoder) markParity(data []byte) {
- binary.LittleEndian.PutUint32(data, enc.next)
- binary.LittleEndian.PutUint16(data[4:], typeParity)
- // sequence wrap will only happen at parity shard
- enc.next = (enc.next + 1) % enc.paws
-}
diff --git a/vendor/github.com/xtaci/kcp-go/flame.png b/vendor/github.com/xtaci/kcp-go/flame.png
deleted file mode 100644
index 672f649..0000000
Binary files a/vendor/github.com/xtaci/kcp-go/flame.png and /dev/null differ
diff --git a/vendor/github.com/xtaci/kcp-go/frame.png b/vendor/github.com/xtaci/kcp-go/frame.png
deleted file mode 100644
index 0b0aefd..0000000
Binary files a/vendor/github.com/xtaci/kcp-go/frame.png and /dev/null differ
diff --git a/vendor/github.com/xtaci/kcp-go/kcp-go.png b/vendor/github.com/xtaci/kcp-go/kcp-go.png
deleted file mode 100644
index 151b7c4..0000000
Binary files a/vendor/github.com/xtaci/kcp-go/kcp-go.png and /dev/null differ
diff --git a/vendor/github.com/xtaci/kcp-go/kcp.go b/vendor/github.com/xtaci/kcp-go/kcp.go
deleted file mode 100644
index 342e69a..0000000
--- a/vendor/github.com/xtaci/kcp-go/kcp.go
+++ /dev/null
@@ -1,1070 +0,0 @@
-package kcp
-
-import (
- "encoding/binary"
- "sync/atomic"
- "time"
-)
-
-const (
- IKCP_RTO_NDL = 30 // no delay min rto
- IKCP_RTO_MIN = 100 // normal min rto
- IKCP_RTO_DEF = 200
- IKCP_RTO_MAX = 60000
- IKCP_CMD_PUSH = 81 // cmd: push data
- IKCP_CMD_ACK = 82 // cmd: ack
- IKCP_CMD_WASK = 83 // cmd: window probe (ask)
- IKCP_CMD_WINS = 84 // cmd: window size (tell)
- IKCP_ASK_SEND = 1 // need to send IKCP_CMD_WASK
- IKCP_ASK_TELL = 2 // need to send IKCP_CMD_WINS
- IKCP_WND_SND = 32
- IKCP_WND_RCV = 32
- IKCP_MTU_DEF = 1400
- IKCP_ACK_FAST = 3
- IKCP_INTERVAL = 100
- IKCP_OVERHEAD = 24
- IKCP_DEADLINK = 20
- IKCP_THRESH_INIT = 2
- IKCP_THRESH_MIN = 2
- IKCP_PROBE_INIT = 7000 // 7 secs to probe window size
- IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
- IKCP_SN_OFFSET = 12
-)
-
-// monotonic reference time point
-var refTime time.Time = time.Now()
-
-// currentMs returns current elasped monotonic milliseconds since program startup
-func currentMs() uint32 { return uint32(time.Now().Sub(refTime) / time.Millisecond) }
-
-// output_callback is a prototype which ought capture conn and call conn.Write
-type output_callback func(buf []byte, size int)
-
-/* encode 8 bits unsigned int */
-func ikcp_encode8u(p []byte, c byte) []byte {
- p[0] = c
- return p[1:]
-}
-
-/* decode 8 bits unsigned int */
-func ikcp_decode8u(p []byte, c *byte) []byte {
- *c = p[0]
- return p[1:]
-}
-
-/* encode 16 bits unsigned int (lsb) */
-func ikcp_encode16u(p []byte, w uint16) []byte {
- binary.LittleEndian.PutUint16(p, w)
- return p[2:]
-}
-
-/* decode 16 bits unsigned int (lsb) */
-func ikcp_decode16u(p []byte, w *uint16) []byte {
- *w = binary.LittleEndian.Uint16(p)
- return p[2:]
-}
-
-/* encode 32 bits unsigned int (lsb) */
-func ikcp_encode32u(p []byte, l uint32) []byte {
- binary.LittleEndian.PutUint32(p, l)
- return p[4:]
-}
-
-/* decode 32 bits unsigned int (lsb) */
-func ikcp_decode32u(p []byte, l *uint32) []byte {
- *l = binary.LittleEndian.Uint32(p)
- return p[4:]
-}
-
-func _imin_(a, b uint32) uint32 {
- if a <= b {
- return a
- }
- return b
-}
-
-func _imax_(a, b uint32) uint32 {
- if a >= b {
- return a
- }
- return b
-}
-
-func _ibound_(lower, middle, upper uint32) uint32 {
- return _imin_(_imax_(lower, middle), upper)
-}
-
-func _itimediff(later, earlier uint32) int32 {
- return (int32)(later - earlier)
-}
-
-// segment defines a KCP segment
-type segment struct {
- conv uint32
- cmd uint8
- frg uint8
- wnd uint16
- ts uint32
- sn uint32
- una uint32
- rto uint32
- xmit uint32
- resendts uint32
- fastack uint32
- acked uint32 // mark if the seg has acked
- data []byte
-}
-
-// encode a segment into buffer
-func (seg *segment) encode(ptr []byte) []byte {
- ptr = ikcp_encode32u(ptr, seg.conv)
- ptr = ikcp_encode8u(ptr, seg.cmd)
- ptr = ikcp_encode8u(ptr, seg.frg)
- ptr = ikcp_encode16u(ptr, seg.wnd)
- ptr = ikcp_encode32u(ptr, seg.ts)
- ptr = ikcp_encode32u(ptr, seg.sn)
- ptr = ikcp_encode32u(ptr, seg.una)
- ptr = ikcp_encode32u(ptr, uint32(len(seg.data)))
- atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
- return ptr
-}
-
-// KCP defines a single KCP connection
-type KCP struct {
- conv, mtu, mss, state uint32
- snd_una, snd_nxt, rcv_nxt uint32
- ssthresh uint32
- rx_rttvar, rx_srtt int32
- rx_rto, rx_minrto uint32
- snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
- interval, ts_flush uint32
- nodelay, updated uint32
- ts_probe, probe_wait uint32
- dead_link, incr uint32
-
- fastresend int32
- nocwnd, stream int32
-
- snd_queue []segment
- rcv_queue []segment
- snd_buf []segment
- rcv_buf []segment
-
- acklist []ackItem
-
- buffer []byte
- reserved int
- output output_callback
-}
-
-type ackItem struct {
- sn uint32
- ts uint32
-}
-
-// NewKCP create a new kcp state machine
-//
-// 'conv' must be equal in the connection peers, or else data will be silently rejected.
-//
-// 'output' function will be called whenever these is data to be sent on wire.
-func NewKCP(conv uint32, output output_callback) *KCP {
- kcp := new(KCP)
- kcp.conv = conv
- kcp.snd_wnd = IKCP_WND_SND
- kcp.rcv_wnd = IKCP_WND_RCV
- kcp.rmt_wnd = IKCP_WND_RCV
- kcp.mtu = IKCP_MTU_DEF
- kcp.mss = kcp.mtu - IKCP_OVERHEAD
- kcp.buffer = make([]byte, kcp.mtu)
- kcp.rx_rto = IKCP_RTO_DEF
- kcp.rx_minrto = IKCP_RTO_MIN
- kcp.interval = IKCP_INTERVAL
- kcp.ts_flush = IKCP_INTERVAL
- kcp.ssthresh = IKCP_THRESH_INIT
- kcp.dead_link = IKCP_DEADLINK
- kcp.output = output
- return kcp
-}
-
-// newSegment creates a KCP segment
-func (kcp *KCP) newSegment(size int) (seg segment) {
- seg.data = xmitBuf.Get().([]byte)[:size]
- return
-}
-
-// delSegment recycles a KCP segment
-func (kcp *KCP) delSegment(seg *segment) {
- if seg.data != nil {
- xmitBuf.Put(seg.data)
- seg.data = nil
- }
-}
-
-// ReserveBytes keeps n bytes untouched from the beginning of the buffer,
-// the output_callback function should be aware of this.
-//
-// Return false if n >= mss
-func (kcp *KCP) ReserveBytes(n int) bool {
- if n >= int(kcp.mtu-IKCP_OVERHEAD) || n < 0 {
- return false
- }
- kcp.reserved = n
- kcp.mss = kcp.mtu - IKCP_OVERHEAD - uint32(n)
- return true
-}
-
-// PeekSize checks the size of next message in the recv queue
-func (kcp *KCP) PeekSize() (length int) {
- if len(kcp.rcv_queue) == 0 {
- return -1
- }
-
- seg := &kcp.rcv_queue[0]
- if seg.frg == 0 {
- return len(seg.data)
- }
-
- if len(kcp.rcv_queue) < int(seg.frg+1) {
- return -1
- }
-
- for k := range kcp.rcv_queue {
- seg := &kcp.rcv_queue[k]
- length += len(seg.data)
- if seg.frg == 0 {
- break
- }
- }
- return
-}
-
-// Receive data from kcp state machine
-//
-// Return number of bytes read.
-//
-// Return -1 when there is no readable data.
-//
-// Return -2 if len(buffer) is smaller than kcp.PeekSize().
-func (kcp *KCP) Recv(buffer []byte) (n int) {
- peeksize := kcp.PeekSize()
- if peeksize < 0 {
- return -1
- }
-
- if peeksize > len(buffer) {
- return -2
- }
-
- var fast_recover bool
- if len(kcp.rcv_queue) >= int(kcp.rcv_wnd) {
- fast_recover = true
- }
-
- // merge fragment
- count := 0
- for k := range kcp.rcv_queue {
- seg := &kcp.rcv_queue[k]
- copy(buffer, seg.data)
- buffer = buffer[len(seg.data):]
- n += len(seg.data)
- count++
- kcp.delSegment(seg)
- if seg.frg == 0 {
- break
- }
- }
- if count > 0 {
- kcp.rcv_queue = kcp.remove_front(kcp.rcv_queue, count)
- }
-
- // move available data from rcv_buf -> rcv_queue
- count = 0
- for k := range kcp.rcv_buf {
- seg := &kcp.rcv_buf[k]
- if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue)+count < int(kcp.rcv_wnd) {
- kcp.rcv_nxt++
- count++
- } else {
- break
- }
- }
-
- if count > 0 {
- kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
- kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
- }
-
- // fast recover
- if len(kcp.rcv_queue) < int(kcp.rcv_wnd) && fast_recover {
- // ready to send back IKCP_CMD_WINS in ikcp_flush
- // tell remote my window size
- kcp.probe |= IKCP_ASK_TELL
- }
- return
-}
-
-// Send is user/upper level send, returns below zero for error
-func (kcp *KCP) Send(buffer []byte) int {
- var count int
- if len(buffer) == 0 {
- return -1
- }
-
- // append to previous segment in streaming mode (if possible)
- if kcp.stream != 0 {
- n := len(kcp.snd_queue)
- if n > 0 {
- seg := &kcp.snd_queue[n-1]
- if len(seg.data) < int(kcp.mss) {
- capacity := int(kcp.mss) - len(seg.data)
- extend := capacity
- if len(buffer) < capacity {
- extend = len(buffer)
- }
-
- // grow slice, the underlying cap is guaranteed to
- // be larger than kcp.mss
- oldlen := len(seg.data)
- seg.data = seg.data[:oldlen+extend]
- copy(seg.data[oldlen:], buffer)
- buffer = buffer[extend:]
- }
- }
-
- if len(buffer) == 0 {
- return 0
- }
- }
-
- if len(buffer) <= int(kcp.mss) {
- count = 1
- } else {
- count = (len(buffer) + int(kcp.mss) - 1) / int(kcp.mss)
- }
-
- if count > 255 {
- return -2
- }
-
- if count == 0 {
- count = 1
- }
-
- for i := 0; i < count; i++ {
- var size int
- if len(buffer) > int(kcp.mss) {
- size = int(kcp.mss)
- } else {
- size = len(buffer)
- }
- seg := kcp.newSegment(size)
- copy(seg.data, buffer[:size])
- if kcp.stream == 0 { // message mode
- seg.frg = uint8(count - i - 1)
- } else { // stream mode
- seg.frg = 0
- }
- kcp.snd_queue = append(kcp.snd_queue, seg)
- buffer = buffer[size:]
- }
- return 0
-}
-
-func (kcp *KCP) update_ack(rtt int32) {
- // https://tools.ietf.org/html/rfc6298
- var rto uint32
- if kcp.rx_srtt == 0 {
- kcp.rx_srtt = rtt
- kcp.rx_rttvar = rtt >> 1
- } else {
- delta := rtt - kcp.rx_srtt
- kcp.rx_srtt += delta >> 3
- if delta < 0 {
- delta = -delta
- }
- if rtt < kcp.rx_srtt-kcp.rx_rttvar {
- // if the new RTT sample is below the bottom of the range of
- // what an RTT measurement is expected to be.
- // give an 8x reduced weight versus its normal weighting
- kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 5
- } else {
- kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 2
- }
- }
- rto = uint32(kcp.rx_srtt) + _imax_(kcp.interval, uint32(kcp.rx_rttvar)<<2)
- kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
-}
-
-func (kcp *KCP) shrink_buf() {
- if len(kcp.snd_buf) > 0 {
- seg := &kcp.snd_buf[0]
- kcp.snd_una = seg.sn
- } else {
- kcp.snd_una = kcp.snd_nxt
- }
-}
-
-func (kcp *KCP) parse_ack(sn uint32) {
- if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
- return
- }
-
- for k := range kcp.snd_buf {
- seg := &kcp.snd_buf[k]
- if sn == seg.sn {
- // mark and free space, but leave the segment here,
- // and wait until `una` to delete this, then we don't
- // have to shift the segments behind forward,
- // which is an expensive operation for large window
- seg.acked = 1
- kcp.delSegment(seg)
- break
- }
- if _itimediff(sn, seg.sn) < 0 {
- break
- }
- }
-}
-
-func (kcp *KCP) parse_fastack(sn, ts uint32) {
- if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
- return
- }
-
- for k := range kcp.snd_buf {
- seg := &kcp.snd_buf[k]
- if _itimediff(sn, seg.sn) < 0 {
- break
- } else if sn != seg.sn && _itimediff(seg.ts, ts) <= 0 {
- seg.fastack++
- }
- }
-}
-
-func (kcp *KCP) parse_una(una uint32) {
- count := 0
- for k := range kcp.snd_buf {
- seg := &kcp.snd_buf[k]
- if _itimediff(una, seg.sn) > 0 {
- kcp.delSegment(seg)
- count++
- } else {
- break
- }
- }
- if count > 0 {
- kcp.snd_buf = kcp.remove_front(kcp.snd_buf, count)
- }
-}
-
-// ack append
-func (kcp *KCP) ack_push(sn, ts uint32) {
- kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
-}
-
-// returns true if data has repeated
-func (kcp *KCP) parse_data(newseg segment) bool {
- sn := newseg.sn
- if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
- _itimediff(sn, kcp.rcv_nxt) < 0 {
- return true
- }
-
- n := len(kcp.rcv_buf) - 1
- insert_idx := 0
- repeat := false
- for i := n; i >= 0; i-- {
- seg := &kcp.rcv_buf[i]
- if seg.sn == sn {
- repeat = true
- break
- }
- if _itimediff(sn, seg.sn) > 0 {
- insert_idx = i + 1
- break
- }
- }
-
- if !repeat {
- // replicate the content if it's new
- dataCopy := xmitBuf.Get().([]byte)[:len(newseg.data)]
- copy(dataCopy, newseg.data)
- newseg.data = dataCopy
-
- if insert_idx == n+1 {
- kcp.rcv_buf = append(kcp.rcv_buf, newseg)
- } else {
- kcp.rcv_buf = append(kcp.rcv_buf, segment{})
- copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
- kcp.rcv_buf[insert_idx] = newseg
- }
- }
-
- // move available data from rcv_buf -> rcv_queue
- count := 0
- for k := range kcp.rcv_buf {
- seg := &kcp.rcv_buf[k]
- if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue)+count < int(kcp.rcv_wnd) {
- kcp.rcv_nxt++
- count++
- } else {
- break
- }
- }
- if count > 0 {
- kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
- kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
- }
-
- return repeat
-}
-
-// Input a packet into kcp state machine.
-//
-// 'regular' indicates it's a real data packet from remote, and it means it's not generated from ReedSolomon
-// codecs.
-//
-// 'ackNoDelay' will trigger immediate ACK, but surely it will not be efficient in bandwidth
-func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
- snd_una := kcp.snd_una
- if len(data) < IKCP_OVERHEAD {
- return -1
- }
-
- var latest uint32 // the latest ack packet
- var flag int
- var inSegs uint64
-
- for {
- var ts, sn, length, una, conv uint32
- var wnd uint16
- var cmd, frg uint8
-
- if len(data) < int(IKCP_OVERHEAD) {
- break
- }
-
- data = ikcp_decode32u(data, &conv)
- if conv != kcp.conv {
- return -1
- }
-
- data = ikcp_decode8u(data, &cmd)
- data = ikcp_decode8u(data, &frg)
- data = ikcp_decode16u(data, &wnd)
- data = ikcp_decode32u(data, &ts)
- data = ikcp_decode32u(data, &sn)
- data = ikcp_decode32u(data, &una)
- data = ikcp_decode32u(data, &length)
- if len(data) < int(length) {
- return -2
- }
-
- if cmd != IKCP_CMD_PUSH && cmd != IKCP_CMD_ACK &&
- cmd != IKCP_CMD_WASK && cmd != IKCP_CMD_WINS {
- return -3
- }
-
- // only trust window updates from regular packets. i.e: latest update
- if regular {
- kcp.rmt_wnd = uint32(wnd)
- }
- kcp.parse_una(una)
- kcp.shrink_buf()
-
- if cmd == IKCP_CMD_ACK {
- kcp.parse_ack(sn)
- kcp.parse_fastack(sn, ts)
- flag |= 1
- latest = ts
- } else if cmd == IKCP_CMD_PUSH {
- repeat := true
- if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
- kcp.ack_push(sn, ts)
- if _itimediff(sn, kcp.rcv_nxt) >= 0 {
- var seg segment
- seg.conv = conv
- seg.cmd = cmd
- seg.frg = frg
- seg.wnd = wnd
- seg.ts = ts
- seg.sn = sn
- seg.una = una
- seg.data = data[:length] // delayed data copying
- repeat = kcp.parse_data(seg)
- }
- }
- if regular && repeat {
- atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
- }
- } else if cmd == IKCP_CMD_WASK {
- // ready to send back IKCP_CMD_WINS in Ikcp_flush
- // tell remote my window size
- kcp.probe |= IKCP_ASK_TELL
- } else if cmd == IKCP_CMD_WINS {
- // do nothing
- } else {
- return -3
- }
-
- inSegs++
- data = data[length:]
- }
- atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
-
- // update rtt with the latest ts
- // ignore the FEC packet
- if flag != 0 && regular {
- current := currentMs()
- if _itimediff(current, latest) >= 0 {
- kcp.update_ack(_itimediff(current, latest))
- }
- }
-
- // cwnd update when packet arrived
- if kcp.nocwnd == 0 {
- if _itimediff(kcp.snd_una, snd_una) > 0 {
- if kcp.cwnd < kcp.rmt_wnd {
- mss := kcp.mss
- if kcp.cwnd < kcp.ssthresh {
- kcp.cwnd++
- kcp.incr += mss
- } else {
- if kcp.incr < mss {
- kcp.incr = mss
- }
- kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
- if (kcp.cwnd+1)*mss <= kcp.incr {
- kcp.cwnd++
- }
- }
- if kcp.cwnd > kcp.rmt_wnd {
- kcp.cwnd = kcp.rmt_wnd
- kcp.incr = kcp.rmt_wnd * mss
- }
- }
- }
- }
-
- if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
- kcp.flush(true)
- }
- return 0
-}
-
-func (kcp *KCP) wnd_unused() uint16 {
- if len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
- return uint16(int(kcp.rcv_wnd) - len(kcp.rcv_queue))
- }
- return 0
-}
-
-// flush pending data
-func (kcp *KCP) flush(ackOnly bool) uint32 {
- var seg segment
- seg.conv = kcp.conv
- seg.cmd = IKCP_CMD_ACK
- seg.wnd = kcp.wnd_unused()
- seg.una = kcp.rcv_nxt
-
- buffer := kcp.buffer
- ptr := buffer[kcp.reserved:] // keep n bytes untouched
-
- // makeSpace makes room for writing
- makeSpace := func(space int) {
- size := len(buffer) - len(ptr)
- if size+space > int(kcp.mtu) {
- kcp.output(buffer, size)
- ptr = buffer[kcp.reserved:]
- }
- }
-
- // flush bytes in buffer if there is any
- flushBuffer := func() {
- size := len(buffer) - len(ptr)
- if size > kcp.reserved {
- kcp.output(buffer, size)
- }
- }
-
- // flush acknowledges
- for i, ack := range kcp.acklist {
- makeSpace(IKCP_OVERHEAD)
- // filter jitters caused by bufferbloat
- if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
- seg.sn, seg.ts = ack.sn, ack.ts
- ptr = seg.encode(ptr)
- }
- }
- kcp.acklist = kcp.acklist[0:0]
-
- if ackOnly { // flash remain ack segments
- flushBuffer()
- return kcp.interval
- }
-
- // probe window size (if remote window size equals zero)
- if kcp.rmt_wnd == 0 {
- current := currentMs()
- if kcp.probe_wait == 0 {
- kcp.probe_wait = IKCP_PROBE_INIT
- kcp.ts_probe = current + kcp.probe_wait
- } else {
- if _itimediff(current, kcp.ts_probe) >= 0 {
- if kcp.probe_wait < IKCP_PROBE_INIT {
- kcp.probe_wait = IKCP_PROBE_INIT
- }
- kcp.probe_wait += kcp.probe_wait / 2
- if kcp.probe_wait > IKCP_PROBE_LIMIT {
- kcp.probe_wait = IKCP_PROBE_LIMIT
- }
- kcp.ts_probe = current + kcp.probe_wait
- kcp.probe |= IKCP_ASK_SEND
- }
- }
- } else {
- kcp.ts_probe = 0
- kcp.probe_wait = 0
- }
-
- // flush window probing commands
- if (kcp.probe & IKCP_ASK_SEND) != 0 {
- seg.cmd = IKCP_CMD_WASK
- makeSpace(IKCP_OVERHEAD)
- ptr = seg.encode(ptr)
- }
-
- // flush window probing commands
- if (kcp.probe & IKCP_ASK_TELL) != 0 {
- seg.cmd = IKCP_CMD_WINS
- makeSpace(IKCP_OVERHEAD)
- ptr = seg.encode(ptr)
- }
-
- kcp.probe = 0
-
- // calculate window size
- cwnd := _imin_(kcp.snd_wnd, kcp.rmt_wnd)
- if kcp.nocwnd == 0 {
- cwnd = _imin_(kcp.cwnd, cwnd)
- }
-
- // sliding window, controlled by snd_nxt && sna_una+cwnd
- newSegsCount := 0
- for k := range kcp.snd_queue {
- if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
- break
- }
- newseg := kcp.snd_queue[k]
- newseg.conv = kcp.conv
- newseg.cmd = IKCP_CMD_PUSH
- newseg.sn = kcp.snd_nxt
- kcp.snd_buf = append(kcp.snd_buf, newseg)
- kcp.snd_nxt++
- newSegsCount++
- }
- if newSegsCount > 0 {
- kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
- }
-
- // calculate resent
- resent := uint32(kcp.fastresend)
- if kcp.fastresend <= 0 {
- resent = 0xffffffff
- }
-
- // check for retransmissions
- current := currentMs()
- var change, lostSegs, fastRetransSegs, earlyRetransSegs uint64
- minrto := int32(kcp.interval)
-
- ref := kcp.snd_buf[:len(kcp.snd_buf)] // for bounds check elimination
- for k := range ref {
- segment := &ref[k]
- needsend := false
- if segment.acked == 1 {
- continue
- }
- if segment.xmit == 0 { // initial transmit
- needsend = true
- segment.rto = kcp.rx_rto
- segment.resendts = current + segment.rto
- } else if segment.fastack >= resent { // fast retransmit
- needsend = true
- segment.fastack = 0
- segment.rto = kcp.rx_rto
- segment.resendts = current + segment.rto
- change++
- fastRetransSegs++
- } else if segment.fastack > 0 && newSegsCount == 0 { // early retransmit
- needsend = true
- segment.fastack = 0
- segment.rto = kcp.rx_rto
- segment.resendts = current + segment.rto
- change++
- earlyRetransSegs++
- } else if _itimediff(current, segment.resendts) >= 0 { // RTO
- needsend = true
- if kcp.nodelay == 0 {
- segment.rto += kcp.rx_rto
- } else {
- segment.rto += kcp.rx_rto / 2
- }
- segment.fastack = 0
- segment.resendts = current + segment.rto
- lostSegs++
- }
-
- if needsend {
- current = currentMs()
- segment.xmit++
- segment.ts = current
- segment.wnd = seg.wnd
- segment.una = seg.una
-
- need := IKCP_OVERHEAD + len(segment.data)
- makeSpace(need)
- ptr = segment.encode(ptr)
- copy(ptr, segment.data)
- ptr = ptr[len(segment.data):]
-
- if segment.xmit >= kcp.dead_link {
- kcp.state = 0xFFFFFFFF
- }
- }
-
- // get the nearest rto
- if rto := _itimediff(segment.resendts, current); rto > 0 && rto < minrto {
- minrto = rto
- }
- }
-
- // flash remain segments
- flushBuffer()
-
- // counter updates
- sum := lostSegs
- if lostSegs > 0 {
- atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
- }
- if fastRetransSegs > 0 {
- atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
- sum += fastRetransSegs
- }
- if earlyRetransSegs > 0 {
- atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
- sum += earlyRetransSegs
- }
- if sum > 0 {
- atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
- }
-
- // cwnd update
- if kcp.nocwnd == 0 {
- // update ssthresh
- // rate halving, https://tools.ietf.org/html/rfc6937
- if change > 0 {
- inflight := kcp.snd_nxt - kcp.snd_una
- kcp.ssthresh = inflight / 2
- if kcp.ssthresh < IKCP_THRESH_MIN {
- kcp.ssthresh = IKCP_THRESH_MIN
- }
- kcp.cwnd = kcp.ssthresh + resent
- kcp.incr = kcp.cwnd * kcp.mss
- }
-
- // congestion control, https://tools.ietf.org/html/rfc5681
- if lostSegs > 0 {
- kcp.ssthresh = cwnd / 2
- if kcp.ssthresh < IKCP_THRESH_MIN {
- kcp.ssthresh = IKCP_THRESH_MIN
- }
- kcp.cwnd = 1
- kcp.incr = kcp.mss
- }
-
- if kcp.cwnd < 1 {
- kcp.cwnd = 1
- kcp.incr = kcp.mss
- }
- }
-
- return uint32(minrto)
-}
-
-// (deprecated)
-//
-// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
-// ikcp_check when to call it again (without ikcp_input/_send calling).
-// 'current' - current timestamp in millisec.
-func (kcp *KCP) Update() {
- var slap int32
-
- current := currentMs()
- if kcp.updated == 0 {
- kcp.updated = 1
- kcp.ts_flush = current
- }
-
- slap = _itimediff(current, kcp.ts_flush)
-
- if slap >= 10000 || slap < -10000 {
- kcp.ts_flush = current
- slap = 0
- }
-
- if slap >= 0 {
- kcp.ts_flush += kcp.interval
- if _itimediff(current, kcp.ts_flush) >= 0 {
- kcp.ts_flush = current + kcp.interval
- }
- kcp.flush(false)
- }
-}
-
-// (deprecated)
-//
-// Check determines when should you invoke ikcp_update:
-// returns when you should invoke ikcp_update in millisec, if there
-// is no ikcp_input/_send calling. you can call ikcp_update in that
-// time, instead of call update repeatly.
-// Important to reduce unnacessary ikcp_update invoking. use it to
-// schedule ikcp_update (eg. implementing an epoll-like mechanism,
-// or optimize ikcp_update when handling massive kcp connections)
-func (kcp *KCP) Check() uint32 {
- current := currentMs()
- ts_flush := kcp.ts_flush
- tm_flush := int32(0x7fffffff)
- tm_packet := int32(0x7fffffff)
- minimal := uint32(0)
- if kcp.updated == 0 {
- return current
- }
-
- if _itimediff(current, ts_flush) >= 10000 ||
- _itimediff(current, ts_flush) < -10000 {
- ts_flush = current
- }
-
- if _itimediff(current, ts_flush) >= 0 {
- return current
- }
-
- tm_flush = _itimediff(ts_flush, current)
-
- for k := range kcp.snd_buf {
- seg := &kcp.snd_buf[k]
- diff := _itimediff(seg.resendts, current)
- if diff <= 0 {
- return current
- }
- if diff < tm_packet {
- tm_packet = diff
- }
- }
-
- minimal = uint32(tm_packet)
- if tm_packet >= tm_flush {
- minimal = uint32(tm_flush)
- }
- if minimal >= kcp.interval {
- minimal = kcp.interval
- }
-
- return current + minimal
-}
-
-// SetMtu changes MTU size, default is 1400
-func (kcp *KCP) SetMtu(mtu int) int {
- if mtu < 50 || mtu < IKCP_OVERHEAD {
- return -1
- }
- if kcp.reserved >= int(kcp.mtu-IKCP_OVERHEAD) || kcp.reserved < 0 {
- return -1
- }
-
- buffer := make([]byte, mtu)
- if buffer == nil {
- return -2
- }
- kcp.mtu = uint32(mtu)
- kcp.mss = kcp.mtu - IKCP_OVERHEAD - uint32(kcp.reserved)
- kcp.buffer = buffer
- return 0
-}
-
-// NoDelay options
-// fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
-// nodelay: 0:disable(default), 1:enable
-// interval: internal update timer interval in millisec, default is 100ms
-// resend: 0:disable fast resend(default), 1:enable fast resend
-// nc: 0:normal congestion control(default), 1:disable congestion control
-func (kcp *KCP) NoDelay(nodelay, interval, resend, nc int) int {
- if nodelay >= 0 {
- kcp.nodelay = uint32(nodelay)
- if nodelay != 0 {
- kcp.rx_minrto = IKCP_RTO_NDL
- } else {
- kcp.rx_minrto = IKCP_RTO_MIN
- }
- }
- if interval >= 0 {
- if interval > 5000 {
- interval = 5000
- } else if interval < 10 {
- interval = 10
- }
- kcp.interval = uint32(interval)
- }
- if resend >= 0 {
- kcp.fastresend = int32(resend)
- }
- if nc >= 0 {
- kcp.nocwnd = int32(nc)
- }
- return 0
-}
-
-// WndSize sets maximum window size: sndwnd=32, rcvwnd=32 by default
-func (kcp *KCP) WndSize(sndwnd, rcvwnd int) int {
- if sndwnd > 0 {
- kcp.snd_wnd = uint32(sndwnd)
- }
- if rcvwnd > 0 {
- kcp.rcv_wnd = uint32(rcvwnd)
- }
- return 0
-}
-
-// WaitSnd gets how many packet is waiting to be sent
-func (kcp *KCP) WaitSnd() int {
- return len(kcp.snd_buf) + len(kcp.snd_queue)
-}
-
-// remove front n elements from queue
-// if the number of elements to remove is more than half of the size.
-// just shift the rear elements to front, otherwise just reslice q to q[n:]
-// then the cost of runtime.growslice can always be less than n/2
-func (kcp *KCP) remove_front(q []segment, n int) []segment {
- if n > cap(q)/2 {
- newn := copy(q, q[n:])
- return q[:newn]
- }
- return q[n:]
-}
-
-// Release all cached outgoing segments
-func (kcp *KCP) ReleaseTX() {
- for k := range kcp.snd_queue {
- if kcp.snd_queue[k].data != nil {
- xmitBuf.Put(kcp.snd_queue[k].data)
- }
- }
- for k := range kcp.snd_buf {
- if kcp.snd_buf[k].data != nil {
- xmitBuf.Put(kcp.snd_buf[k].data)
- }
- }
- kcp.snd_queue = nil
- kcp.snd_buf = nil
-}
diff --git a/vendor/github.com/xtaci/kcp-go/readloop.go b/vendor/github.com/xtaci/kcp-go/readloop.go
deleted file mode 100644
index bc48e84..0000000
--- a/vendor/github.com/xtaci/kcp-go/readloop.go
+++ /dev/null
@@ -1,48 +0,0 @@
-package kcp
-
-import (
- "sync/atomic"
-
- "github.com/pkg/errors"
-)
-
-func (s *UDPSession) defaultReadLoop() {
- buf := make([]byte, mtuLimit)
- var src string
- for {
- if n, addr, err := s.conn.ReadFrom(buf); err == nil {
- // make sure the packet is from the same source
- if src == "" { // set source address
- src = addr.String()
- } else if addr.String() != src {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- continue
- }
-
- if n >= s.headerSize+IKCP_OVERHEAD {
- s.packetInput(buf[:n])
- } else {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- }
- } else {
- s.notifyReadError(errors.WithStack(err))
- return
- }
- }
-}
-
-func (l *Listener) defaultMonitor() {
- buf := make([]byte, mtuLimit)
- for {
- if n, from, err := l.conn.ReadFrom(buf); err == nil {
- if n >= l.headerSize+IKCP_OVERHEAD {
- l.packetInput(buf[:n], from)
- } else {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- }
- } else {
- l.notifyReadError(errors.WithStack(err))
- return
- }
- }
-}
diff --git a/vendor/github.com/xtaci/kcp-go/readloop_generic.go b/vendor/github.com/xtaci/kcp-go/readloop_generic.go
deleted file mode 100644
index 5dbe4f4..0000000
--- a/vendor/github.com/xtaci/kcp-go/readloop_generic.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// +build !linux
-
-package kcp
-
-func (s *UDPSession) readLoop() {
- s.defaultReadLoop()
-}
-
-func (l *Listener) monitor() {
- l.defaultMonitor()
-}
diff --git a/vendor/github.com/xtaci/kcp-go/readloop_linux.go b/vendor/github.com/xtaci/kcp-go/readloop_linux.go
deleted file mode 100644
index 6226478..0000000
--- a/vendor/github.com/xtaci/kcp-go/readloop_linux.go
+++ /dev/null
@@ -1,120 +0,0 @@
-// +build linux
-
-package kcp
-
-import (
- "net"
- "os"
- "sync/atomic"
-
- "github.com/pkg/errors"
- "golang.org/x/net/ipv4"
- "golang.org/x/net/ipv6"
-)
-
-// the read loop for a client session
-func (s *UDPSession) readLoop() {
- // default version
- if s.xconn == nil {
- s.defaultReadLoop()
- return
- }
-
- // x/net version
- var src string
- msgs := make([]ipv4.Message, batchSize)
- for k := range msgs {
- msgs[k].Buffers = [][]byte{make([]byte, mtuLimit)}
- }
-
- for {
- if count, err := s.xconn.ReadBatch(msgs, 0); err == nil {
- for i := 0; i < count; i++ {
- msg := &msgs[i]
- // make sure the packet is from the same source
- if src == "" { // set source address if nil
- src = msg.Addr.String()
- } else if msg.Addr.String() != src {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- continue
- }
-
- if msg.N < s.headerSize+IKCP_OVERHEAD {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- continue
- }
-
- // source and size has validated
- s.packetInput(msg.Buffers[0][:msg.N])
- }
- } else {
- // compatibility issue:
- // for linux kernel<=2.6.32, support for sendmmsg is not available
- // an error of type os.SyscallError will be returned
- if operr, ok := err.(*net.OpError); ok {
- if se, ok := operr.Err.(*os.SyscallError); ok {
- if se.Syscall == "recvmmsg" {
- s.defaultReadLoop()
- return
- }
- }
- }
- s.notifyReadError(errors.WithStack(err))
- return
- }
- }
-}
-
-// monitor incoming data for all connections of server
-func (l *Listener) monitor() {
- var xconn batchConn
- if _, ok := l.conn.(*net.UDPConn); ok {
- addr, err := net.ResolveUDPAddr("udp", l.conn.LocalAddr().String())
- if err == nil {
- if addr.IP.To4() != nil {
- xconn = ipv4.NewPacketConn(l.conn)
- } else {
- xconn = ipv6.NewPacketConn(l.conn)
- }
- }
- }
-
- // default version
- if xconn == nil {
- l.defaultMonitor()
- return
- }
-
- // x/net version
- msgs := make([]ipv4.Message, batchSize)
- for k := range msgs {
- msgs[k].Buffers = [][]byte{make([]byte, mtuLimit)}
- }
-
- for {
- if count, err := xconn.ReadBatch(msgs, 0); err == nil {
- for i := 0; i < count; i++ {
- msg := &msgs[i]
- if msg.N >= l.headerSize+IKCP_OVERHEAD {
- l.packetInput(msg.Buffers[0][:msg.N], msg.Addr)
- } else {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- }
- }
- } else {
- // compatibility issue:
- // for linux kernel<=2.6.32, support for sendmmsg is not available
- // an error of type os.SyscallError will be returned
- if operr, ok := err.(*net.OpError); ok {
- if se, ok := operr.Err.(*os.SyscallError); ok {
- if se.Syscall == "recvmmsg" {
- l.defaultMonitor()
- return
- }
- }
- }
- l.notifyReadError(errors.WithStack(err))
- return
- }
- }
-}
diff --git a/vendor/github.com/xtaci/kcp-go/sess.go b/vendor/github.com/xtaci/kcp-go/sess.go
deleted file mode 100644
index 2384816..0000000
--- a/vendor/github.com/xtaci/kcp-go/sess.go
+++ /dev/null
@@ -1,1054 +0,0 @@
-// Package kcp-go is a Reliable-UDP library for golang.
-//
-// This library intents to provide a smooth, resilient, ordered,
-// error-checked and anonymous delivery of streams over UDP packets.
-//
-// The interfaces of this package aims to be compatible with
-// net.Conn in standard library, but offers powerful features for advanced users.
-package kcp
-
-import (
- "crypto/rand"
- "encoding/binary"
- "hash/crc32"
- "io"
- "net"
- "sync"
- "sync/atomic"
- "time"
-
- "github.com/pkg/errors"
- "golang.org/x/net/ipv4"
- "golang.org/x/net/ipv6"
-)
-
-const (
- // 16-bytes nonce for each packet
- nonceSize = 16
-
- // 4-bytes packet checksum
- crcSize = 4
-
- // overall crypto header size
- cryptHeaderSize = nonceSize + crcSize
-
- // maximum packet size
- mtuLimit = 1500
-
- // FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
- rxFECMulti = 3
-
- // accept backlog
- acceptBacklog = 128
-)
-
-var (
- errInvalidOperation = errors.New("invalid operation")
- errTimeout = errors.New("timeout")
-)
-
-var (
- // a system-wide packet buffer shared among sending, receiving and FEC
- // to mitigate high-frequency memory allocation for packets
- xmitBuf sync.Pool
-)
-
-func init() {
- xmitBuf.New = func() interface{} {
- return make([]byte, mtuLimit)
- }
-}
-
-type (
- // UDPSession defines a KCP session implemented by UDP
- UDPSession struct {
- conn net.PacketConn // the underlying packet connection
- kcp *KCP // KCP ARQ protocol
- l *Listener // pointing to the Listener object if it's been accepted by a Listener
- block BlockCrypt // block encryption object
-
- // kcp receiving is based on packets
- // recvbuf turns packets into stream
- recvbuf []byte
- bufptr []byte
-
- // FEC codec
- fecDecoder *fecDecoder
- fecEncoder *fecEncoder
-
- // settings
- remote net.Addr // remote peer address
- rd time.Time // read deadline
- wd time.Time // write deadline
- headerSize int // the header size additional to a KCP frame
- ackNoDelay bool // send ack immediately for each incoming packet(testing purpose)
- writeDelay bool // delay kcp.flush() for Write() for bulk transfer
- dup int // duplicate udp packets(testing purpose)
-
- // notifications
- die chan struct{} // notify current session has Closed
- dieOnce sync.Once
- chReadEvent chan struct{} // notify Read() can be called without blocking
- chWriteEvent chan struct{} // notify Write() can be called without blocking
-
- // socket error handling
- socketReadError atomic.Value
- socketWriteError atomic.Value
- chSocketReadError chan struct{}
- chSocketWriteError chan struct{}
- socketReadErrorOnce sync.Once
- socketWriteErrorOnce sync.Once
-
- // nonce generator
- nonce Entropy
-
- // packets waiting to be sent on wire
- txqueue []ipv4.Message
- xconn batchConn // for x/net
- xconnWriteError error
-
- mu sync.Mutex
- }
-
- setReadBuffer interface {
- SetReadBuffer(bytes int) error
- }
-
- setWriteBuffer interface {
- SetWriteBuffer(bytes int) error
- }
-
- setDSCP interface {
- SetDSCP(int) error
- }
-)
-
-// newUDPSession create a new udp session for client or server
-func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
- sess := new(UDPSession)
- sess.die = make(chan struct{})
- sess.nonce = new(nonceAES128)
- sess.nonce.Init()
- sess.chReadEvent = make(chan struct{}, 1)
- sess.chWriteEvent = make(chan struct{}, 1)
- sess.chSocketReadError = make(chan struct{})
- sess.chSocketWriteError = make(chan struct{})
- sess.remote = remote
- sess.conn = conn
- sess.l = l
- sess.block = block
- sess.recvbuf = make([]byte, mtuLimit)
-
- // cast to writebatch conn
- if _, ok := conn.(*net.UDPConn); ok {
- addr, err := net.ResolveUDPAddr("udp", conn.LocalAddr().String())
- if err == nil {
- if addr.IP.To4() != nil {
- sess.xconn = ipv4.NewPacketConn(conn)
- } else {
- sess.xconn = ipv6.NewPacketConn(conn)
- }
- }
- }
-
- // FEC codec initialization
- sess.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
- if sess.block != nil {
- sess.fecEncoder = newFECEncoder(dataShards, parityShards, cryptHeaderSize)
- } else {
- sess.fecEncoder = newFECEncoder(dataShards, parityShards, 0)
- }
-
- // calculate additional header size introduced by FEC and encryption
- if sess.block != nil {
- sess.headerSize += cryptHeaderSize
- }
- if sess.fecEncoder != nil {
- sess.headerSize += fecHeaderSizePlus2
- }
-
- sess.kcp = NewKCP(conv, func(buf []byte, size int) {
- if size >= IKCP_OVERHEAD+sess.headerSize {
- sess.output(buf[:size])
- }
- })
- sess.kcp.ReserveBytes(sess.headerSize)
-
- if sess.l == nil { // it's a client connection
- go sess.readLoop()
- atomic.AddUint64(&DefaultSnmp.ActiveOpens, 1)
- } else {
- atomic.AddUint64(&DefaultSnmp.PassiveOpens, 1)
- }
-
- // start per-session updater
- go sess.updater()
-
- currestab := atomic.AddUint64(&DefaultSnmp.CurrEstab, 1)
- maxconn := atomic.LoadUint64(&DefaultSnmp.MaxConn)
- if currestab > maxconn {
- atomic.CompareAndSwapUint64(&DefaultSnmp.MaxConn, maxconn, currestab)
- }
-
- return sess
-}
-
-// Read implements net.Conn
-func (s *UDPSession) Read(b []byte) (n int, err error) {
- for {
- s.mu.Lock()
- if len(s.bufptr) > 0 { // copy from buffer into b
- n = copy(b, s.bufptr)
- s.bufptr = s.bufptr[n:]
- s.mu.Unlock()
- atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(n))
- return n, nil
- }
-
- if size := s.kcp.PeekSize(); size > 0 { // peek data size from kcp
- if len(b) >= size { // receive data into 'b' directly
- s.kcp.Recv(b)
- s.mu.Unlock()
- atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
- return size, nil
- }
-
- // if necessary resize the stream buffer to guarantee a sufficent buffer space
- if cap(s.recvbuf) < size {
- s.recvbuf = make([]byte, size)
- }
-
- // resize the length of recvbuf to correspond to data size
- s.recvbuf = s.recvbuf[:size]
- s.kcp.Recv(s.recvbuf)
- n = copy(b, s.recvbuf) // copy to 'b'
- s.bufptr = s.recvbuf[n:] // pointer update
- s.mu.Unlock()
- atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(n))
- return n, nil
- }
-
- // deadline for current reading operation
- var timeout *time.Timer
- var c <-chan time.Time
- if !s.rd.IsZero() {
- if time.Now().After(s.rd) {
- s.mu.Unlock()
- return 0, errors.WithStack(errTimeout)
- }
-
- delay := s.rd.Sub(time.Now())
- timeout = time.NewTimer(delay)
- c = timeout.C
- }
- s.mu.Unlock()
-
- // wait for read event or timeout or error
- select {
- case <-s.chReadEvent:
- if timeout != nil {
- timeout.Stop()
- }
- case <-c:
- return 0, errors.WithStack(errTimeout)
- case <-s.chSocketReadError:
- return 0, s.socketReadError.Load().(error)
- case <-s.die:
- return 0, errors.WithStack(io.ErrClosedPipe)
- }
- }
-}
-
-// Write implements net.Conn
-func (s *UDPSession) Write(b []byte) (n int, err error) { return s.WriteBuffers([][]byte{b}) }
-
-// WriteBuffers write a vector of byte slices to the underlying connection
-func (s *UDPSession) WriteBuffers(v [][]byte) (n int, err error) {
- for {
- select {
- case <-s.chSocketWriteError:
- return 0, s.socketWriteError.Load().(error)
- case <-s.die:
- return 0, errors.WithStack(io.ErrClosedPipe)
- default:
- }
-
- s.mu.Lock()
-
- // make sure write do not overflow the max sliding window on both side
- waitsnd := s.kcp.WaitSnd()
- if waitsnd < int(s.kcp.snd_wnd) && waitsnd < int(s.kcp.rmt_wnd) {
- for _, b := range v {
- n += len(b)
- for {
- if len(b) <= int(s.kcp.mss) {
- s.kcp.Send(b)
- break
- } else {
- s.kcp.Send(b[:s.kcp.mss])
- b = b[s.kcp.mss:]
- }
- }
- }
-
- waitsnd = s.kcp.WaitSnd()
- if waitsnd >= int(s.kcp.snd_wnd) || waitsnd >= int(s.kcp.rmt_wnd) || !s.writeDelay {
- s.kcp.flush(false)
- s.uncork()
- }
- s.mu.Unlock()
- atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
- return n, nil
- }
-
- var timeout *time.Timer
- var c <-chan time.Time
- if !s.wd.IsZero() {
- if time.Now().After(s.wd) {
- s.mu.Unlock()
- return 0, errors.WithStack(errTimeout)
- }
- delay := s.wd.Sub(time.Now())
- timeout = time.NewTimer(delay)
- c = timeout.C
- }
- s.mu.Unlock()
-
- select {
- case <-s.chWriteEvent:
- if timeout != nil {
- timeout.Stop()
- }
- case <-c:
- return 0, errors.WithStack(errTimeout)
- case <-s.chSocketWriteError:
- return 0, s.socketWriteError.Load().(error)
- case <-s.die:
- return 0, errors.WithStack(io.ErrClosedPipe)
- }
- }
-}
-
-// uncork sends data in txqueue if there is any
-func (s *UDPSession) uncork() {
- if len(s.txqueue) > 0 {
- s.tx(s.txqueue)
- // recycle
- for k := range s.txqueue {
- xmitBuf.Put(s.txqueue[k].Buffers[0])
- s.txqueue[k].Buffers = nil
- }
- s.txqueue = s.txqueue[:0]
- }
- return
-}
-
-// Close closes the connection.
-func (s *UDPSession) Close() error {
- var once bool
- s.dieOnce.Do(func() {
- close(s.die)
- once = true
- })
-
- if once {
- atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
-
- // try best to send all queued messages
- s.mu.Lock()
- s.kcp.flush(false)
- s.uncork()
- // release pending segments
- s.kcp.ReleaseTX()
- if s.fecDecoder != nil {
- s.fecDecoder.release()
- }
- s.mu.Unlock()
-
- if s.l != nil { // belongs to listener
- s.l.closeSession(s.remote)
- return nil
- } else { // client socket close
- return s.conn.Close()
- }
- } else {
- return errors.WithStack(io.ErrClosedPipe)
- }
-}
-
-// LocalAddr returns the local network address. The Addr returned is shared by all invocations of LocalAddr, so do not modify it.
-func (s *UDPSession) LocalAddr() net.Addr { return s.conn.LocalAddr() }
-
-// RemoteAddr returns the remote network address. The Addr returned is shared by all invocations of RemoteAddr, so do not modify it.
-func (s *UDPSession) RemoteAddr() net.Addr { return s.remote }
-
-// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
-func (s *UDPSession) SetDeadline(t time.Time) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.rd = t
- s.wd = t
- s.notifyReadEvent()
- s.notifyWriteEvent()
- return nil
-}
-
-// SetReadDeadline implements the Conn SetReadDeadline method.
-func (s *UDPSession) SetReadDeadline(t time.Time) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.rd = t
- s.notifyReadEvent()
- return nil
-}
-
-// SetWriteDeadline implements the Conn SetWriteDeadline method.
-func (s *UDPSession) SetWriteDeadline(t time.Time) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.wd = t
- s.notifyWriteEvent()
- return nil
-}
-
-// SetWriteDelay delays write for bulk transfer until the next update interval
-func (s *UDPSession) SetWriteDelay(delay bool) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.writeDelay = delay
-}
-
-// SetWindowSize set maximum window size
-func (s *UDPSession) SetWindowSize(sndwnd, rcvwnd int) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.kcp.WndSize(sndwnd, rcvwnd)
-}
-
-// SetMtu sets the maximum transmission unit(not including UDP header)
-func (s *UDPSession) SetMtu(mtu int) bool {
- if mtu > mtuLimit {
- return false
- }
-
- s.mu.Lock()
- defer s.mu.Unlock()
- s.kcp.SetMtu(mtu)
- return true
-}
-
-// SetStreamMode toggles the stream mode on/off
-func (s *UDPSession) SetStreamMode(enable bool) {
- s.mu.Lock()
- defer s.mu.Unlock()
- if enable {
- s.kcp.stream = 1
- } else {
- s.kcp.stream = 0
- }
-}
-
-// SetACKNoDelay changes ack flush option, set true to flush ack immediately,
-func (s *UDPSession) SetACKNoDelay(nodelay bool) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.ackNoDelay = nodelay
-}
-
-// (deprecated)
-//
-// SetDUP duplicates udp packets for kcp output.
-func (s *UDPSession) SetDUP(dup int) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.dup = dup
-}
-
-// SetNoDelay calls nodelay() of kcp
-// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
-func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.kcp.NoDelay(nodelay, interval, resend, nc)
-}
-
-// SetDSCP sets the 6bit DSCP field in IPv4 header, or 8bit Traffic Class in IPv6 header.
-//
-// if the underlying connection has implemented `func SetDSCP(int) error`, SetDSCP() will invoke
-// this function instead.
-//
-// It has no effect if it's accepted from Listener.
-func (s *UDPSession) SetDSCP(dscp int) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- if s.l != nil {
- return errInvalidOperation
- }
-
- // interface enabled
- if ts, ok := s.conn.(setDSCP); ok {
- return ts.SetDSCP(dscp)
- }
-
- if nc, ok := s.conn.(net.Conn); ok {
- var succeed bool
- if err := ipv4.NewConn(nc).SetTOS(dscp << 2); err == nil {
- succeed = true
- }
- if err := ipv6.NewConn(nc).SetTrafficClass(dscp); err == nil {
- succeed = true
- }
-
- if succeed {
- return nil
- }
- }
- return errInvalidOperation
-}
-
-// SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener
-func (s *UDPSession) SetReadBuffer(bytes int) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- if s.l == nil {
- if nc, ok := s.conn.(setReadBuffer); ok {
- return nc.SetReadBuffer(bytes)
- }
- }
- return errInvalidOperation
-}
-
-// SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener
-func (s *UDPSession) SetWriteBuffer(bytes int) error {
- s.mu.Lock()
- defer s.mu.Unlock()
- if s.l == nil {
- if nc, ok := s.conn.(setWriteBuffer); ok {
- return nc.SetWriteBuffer(bytes)
- }
- }
- return errInvalidOperation
-}
-
-// post-processing for sending a packet from kcp core
-// steps:
-// 1. FEC packet generation
-// 2. CRC32 integrity
-// 3. Encryption
-// 4. TxQueue
-func (s *UDPSession) output(buf []byte) {
- var ecc [][]byte
-
- // 1. FEC encoding
- if s.fecEncoder != nil {
- ecc = s.fecEncoder.encode(buf)
- }
-
- // 2&3. crc32 & encryption
- if s.block != nil {
- s.nonce.Fill(buf[:nonceSize])
- checksum := crc32.ChecksumIEEE(buf[cryptHeaderSize:])
- binary.LittleEndian.PutUint32(buf[nonceSize:], checksum)
- s.block.Encrypt(buf, buf)
-
- for k := range ecc {
- s.nonce.Fill(ecc[k][:nonceSize])
- checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
- binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
- s.block.Encrypt(ecc[k], ecc[k])
- }
- }
-
- // 4. TxQueue
- var msg ipv4.Message
- for i := 0; i < s.dup+1; i++ {
- bts := xmitBuf.Get().([]byte)[:len(buf)]
- copy(bts, buf)
- msg.Buffers = [][]byte{bts}
- msg.Addr = s.remote
- s.txqueue = append(s.txqueue, msg)
- }
-
- for k := range ecc {
- bts := xmitBuf.Get().([]byte)[:len(ecc[k])]
- copy(bts, ecc[k])
- msg.Buffers = [][]byte{bts}
- msg.Addr = s.remote
- s.txqueue = append(s.txqueue, msg)
- }
-}
-
-// sess updater to trigger protocol
-func (s *UDPSession) updater() {
- timer := time.NewTimer(0)
- for {
- select {
- case <-timer.C:
- s.mu.Lock()
- interval := time.Duration(s.kcp.flush(false)) * time.Millisecond
- waitsnd := s.kcp.WaitSnd()
- if waitsnd < int(s.kcp.snd_wnd) && waitsnd < int(s.kcp.rmt_wnd) {
- s.notifyWriteEvent()
- }
- s.uncork()
- s.mu.Unlock()
- timer.Reset(interval)
- case <-s.die:
- timer.Stop()
- return
- }
- }
-}
-
-// GetConv gets conversation id of a session
-func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
-
-func (s *UDPSession) notifyReadEvent() {
- select {
- case s.chReadEvent <- struct{}{}:
- default:
- }
-}
-
-func (s *UDPSession) notifyWriteEvent() {
- select {
- case s.chWriteEvent <- struct{}{}:
- default:
- }
-}
-
-func (s *UDPSession) notifyReadError(err error) {
- s.socketReadErrorOnce.Do(func() {
- s.socketReadError.Store(err)
- close(s.chSocketReadError)
- })
-}
-
-func (s *UDPSession) notifyWriteError(err error) {
- s.socketWriteErrorOnce.Do(func() {
- s.socketWriteError.Store(err)
- close(s.chSocketWriteError)
- })
-}
-
-// packet input stage
-func (s *UDPSession) packetInput(data []byte) {
- dataValid := false
- if s.block != nil {
- s.block.Decrypt(data, data)
- data = data[nonceSize:]
- checksum := crc32.ChecksumIEEE(data[crcSize:])
- if checksum == binary.LittleEndian.Uint32(data) {
- data = data[crcSize:]
- dataValid = true
- } else {
- atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
- }
- } else if s.block == nil {
- dataValid = true
- }
-
- if dataValid {
- s.kcpInput(data)
- }
-}
-
-func (s *UDPSession) kcpInput(data []byte) {
- var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64
-
- if s.fecDecoder != nil {
- if len(data) > fecHeaderSize { // must be larger than fec header size
- f := fecPacket(data)
- if f.flag() == typeData || f.flag() == typeParity { // header check
- if f.flag() == typeParity {
- fecParityShards++
- }
-
- // lock
- s.mu.Lock()
- recovers := s.fecDecoder.decode(f)
- if f.flag() == typeData {
- if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
- kcpInErrors++
- }
- }
-
- for _, r := range recovers {
- if len(r) >= 2 { // must be larger than 2bytes
- sz := binary.LittleEndian.Uint16(r)
- if int(sz) <= len(r) && sz >= 2 {
- if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
- fecRecovered++
- } else {
- kcpInErrors++
- }
- } else {
- fecErrs++
- }
- } else {
- fecErrs++
- }
- // recycle the recovers
- xmitBuf.Put(r)
- }
-
- // to notify the readers to receive the data
- if n := s.kcp.PeekSize(); n > 0 {
- s.notifyReadEvent()
- }
- // to notify the writers
- waitsnd := s.kcp.WaitSnd()
- if waitsnd < int(s.kcp.snd_wnd) && waitsnd < int(s.kcp.rmt_wnd) {
- s.notifyWriteEvent()
- }
-
- s.uncork()
- s.mu.Unlock()
- } else {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- }
- } else {
- atomic.AddUint64(&DefaultSnmp.InErrs, 1)
- }
- } else {
- s.mu.Lock()
- if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
- kcpInErrors++
- }
- if n := s.kcp.PeekSize(); n > 0 {
- s.notifyReadEvent()
- }
- waitsnd := s.kcp.WaitSnd()
- if waitsnd < int(s.kcp.snd_wnd) && waitsnd < int(s.kcp.rmt_wnd) {
- s.notifyWriteEvent()
- }
- s.uncork()
- s.mu.Unlock()
- }
-
- atomic.AddUint64(&DefaultSnmp.InPkts, 1)
- atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
- if fecParityShards > 0 {
- atomic.AddUint64(&DefaultSnmp.FECParityShards, fecParityShards)
- }
- if kcpInErrors > 0 {
- atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
- }
- if fecErrs > 0 {
- atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs)
- }
- if fecRecovered > 0 {
- atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered)
- }
-
-}
-
-type (
- // Listener defines a server which will be waiting to accept incoming connections
- Listener struct {
- block BlockCrypt // block encryption
- dataShards int // FEC data shard
- parityShards int // FEC parity shard
- fecDecoder *fecDecoder // FEC mock initialization
- conn net.PacketConn // the underlying packet connection
-
- sessions map[string]*UDPSession // all sessions accepted by this Listener
- sessionLock sync.Mutex
- chAccepts chan *UDPSession // Listen() backlog
- chSessionClosed chan net.Addr // session close queue
- headerSize int // the additional header to a KCP frame
-
- die chan struct{} // notify the listener has closed
- dieOnce sync.Once
-
- // socket error handling
- socketReadError atomic.Value
- chSocketReadError chan struct{}
- socketReadErrorOnce sync.Once
-
- rd atomic.Value // read deadline for Accept()
- }
-)
-
-// packet input stage
-func (l *Listener) packetInput(data []byte, addr net.Addr) {
- dataValid := false
- if l.block != nil {
- l.block.Decrypt(data, data)
- data = data[nonceSize:]
- checksum := crc32.ChecksumIEEE(data[crcSize:])
- if checksum == binary.LittleEndian.Uint32(data) {
- data = data[crcSize:]
- dataValid = true
- } else {
- atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
- }
- } else if l.block == nil {
- dataValid = true
- }
-
- if dataValid {
- l.sessionLock.Lock()
- s, ok := l.sessions[addr.String()]
- l.sessionLock.Unlock()
-
- var conv, sn uint32
- convValid := false
- if l.fecDecoder != nil {
- isfec := binary.LittleEndian.Uint16(data[4:])
- if isfec == typeData {
- conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
- sn = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2+IKCP_SN_OFFSET:])
- convValid = true
- }
- } else {
- conv = binary.LittleEndian.Uint32(data)
- sn = binary.LittleEndian.Uint32(data[IKCP_SN_OFFSET:])
- convValid = true
- }
-
- if ok { // existing connection
- if !convValid || conv == s.kcp.conv { // parity or valid data shard
- s.kcpInput(data)
- } else if sn == 0 { // should replace current connection
- s.Close()
- s = nil
- }
- }
-
- if s == nil && convValid { // new session
- if len(l.chAccepts) < cap(l.chAccepts) { // do not let the new sessions overwhelm accept queue
- s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, addr, l.block)
- s.kcpInput(data)
- l.sessionLock.Lock()
- l.sessions[addr.String()] = s
- l.sessionLock.Unlock()
- l.chAccepts <- s
- }
- }
- }
-}
-
-func (l *Listener) notifyReadError(err error) {
- l.socketReadErrorOnce.Do(func() {
- l.socketReadError.Store(err)
- close(l.chSocketReadError)
-
- // propagate read error to all sessions
- l.sessionLock.Lock()
- for _, s := range l.sessions {
- s.notifyReadError(err)
- }
- l.sessionLock.Unlock()
- })
-}
-
-// SetReadBuffer sets the socket read buffer for the Listener
-func (l *Listener) SetReadBuffer(bytes int) error {
- if nc, ok := l.conn.(setReadBuffer); ok {
- return nc.SetReadBuffer(bytes)
- }
- return errInvalidOperation
-}
-
-// SetWriteBuffer sets the socket write buffer for the Listener
-func (l *Listener) SetWriteBuffer(bytes int) error {
- if nc, ok := l.conn.(setWriteBuffer); ok {
- return nc.SetWriteBuffer(bytes)
- }
- return errInvalidOperation
-}
-
-// SetDSCP sets the 6bit DSCP field in IPv4 header, or 8bit Traffic Class in IPv6 header.
-//
-// if the underlying connection has implemented `func SetDSCP(int) error`, SetDSCP() will invoke
-// this function instead.
-func (l *Listener) SetDSCP(dscp int) error {
- // interface enabled
- if ts, ok := l.conn.(setDSCP); ok {
- return ts.SetDSCP(dscp)
- }
-
- if nc, ok := l.conn.(net.Conn); ok {
- var succeed bool
- if err := ipv4.NewConn(nc).SetTOS(dscp << 2); err == nil {
- succeed = true
- }
- if err := ipv6.NewConn(nc).SetTrafficClass(dscp); err == nil {
- succeed = true
- }
-
- if succeed {
- return nil
- }
- }
- return errInvalidOperation
-}
-
-// Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn.
-func (l *Listener) Accept() (net.Conn, error) {
- return l.AcceptKCP()
-}
-
-// AcceptKCP accepts a KCP connection
-func (l *Listener) AcceptKCP() (*UDPSession, error) {
- var timeout <-chan time.Time
- if tdeadline, ok := l.rd.Load().(time.Time); ok && !tdeadline.IsZero() {
- timeout = time.After(tdeadline.Sub(time.Now()))
- }
-
- select {
- case <-timeout:
- return nil, errors.WithStack(errTimeout)
- case c := <-l.chAccepts:
- return c, nil
- case <-l.chSocketReadError:
- return nil, l.socketReadError.Load().(error)
- case <-l.die:
- return nil, errors.WithStack(io.ErrClosedPipe)
- }
-}
-
-// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
-func (l *Listener) SetDeadline(t time.Time) error {
- l.SetReadDeadline(t)
- l.SetWriteDeadline(t)
- return nil
-}
-
-// SetReadDeadline implements the Conn SetReadDeadline method.
-func (l *Listener) SetReadDeadline(t time.Time) error {
- l.rd.Store(t)
- return nil
-}
-
-// SetWriteDeadline implements the Conn SetWriteDeadline method.
-func (l *Listener) SetWriteDeadline(t time.Time) error { return errInvalidOperation }
-
-// Close stops listening on the UDP address, and closes the socket
-func (l *Listener) Close() error {
- var once bool
- l.dieOnce.Do(func() {
- close(l.die)
- once = true
- })
-
- if once {
- return l.conn.Close()
- } else {
- return errors.WithStack(io.ErrClosedPipe)
- }
-}
-
-// closeSession notify the listener that a session has closed
-func (l *Listener) closeSession(remote net.Addr) (ret bool) {
- l.sessionLock.Lock()
- defer l.sessionLock.Unlock()
- if _, ok := l.sessions[remote.String()]; ok {
- delete(l.sessions, remote.String())
- return true
- }
- return false
-}
-
-// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
-func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
-
-// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
-func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
-
-// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption.
-//
-// 'block' is the block encryption algorithm to encrypt packets.
-//
-// 'dataShards', 'parityShards' specifiy how many parity packets will be generated following the data packets.
-//
-// Check https://github.com/klauspost/reedsolomon for details
-func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards int) (*Listener, error) {
- udpaddr, err := net.ResolveUDPAddr("udp", laddr)
- if err != nil {
- return nil, errors.WithStack(err)
- }
- conn, err := net.ListenUDP("udp", udpaddr)
- if err != nil {
- return nil, errors.WithStack(err)
- }
-
- return ServeConn(block, dataShards, parityShards, conn)
-}
-
-// ServeConn serves KCP protocol for a single packet connection.
-func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
- l := new(Listener)
- l.conn = conn
- l.sessions = make(map[string]*UDPSession)
- l.chAccepts = make(chan *UDPSession, acceptBacklog)
- l.chSessionClosed = make(chan net.Addr)
- l.die = make(chan struct{})
- l.dataShards = dataShards
- l.parityShards = parityShards
- l.block = block
- l.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
- l.chSocketReadError = make(chan struct{})
-
- // calculate header size
- if l.block != nil {
- l.headerSize += cryptHeaderSize
- }
- if l.fecDecoder != nil {
- l.headerSize += fecHeaderSizePlus2
- }
-
- go l.monitor()
- return l, nil
-}
-
-// Dial connects to the remote address "raddr" on the network "udp" without encryption and FEC
-func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
-
-// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
-//
-// 'block' is the block encryption algorithm to encrypt packets.
-//
-// 'dataShards', 'parityShards' specifiy how many parity packets will be generated following the data packets.
-//
-// Check https://github.com/klauspost/reedsolomon for details
-func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
- // network type detection
- udpaddr, err := net.ResolveUDPAddr("udp", raddr)
- if err != nil {
- return nil, errors.WithStack(err)
- }
- network := "udp4"
- if udpaddr.IP.To4() == nil {
- network = "udp"
- }
-
- conn, err := net.ListenUDP(network, nil)
- if err != nil {
- return nil, errors.WithStack(err)
- }
-
- return NewConn(raddr, block, dataShards, parityShards, conn)
-}
-
-// NewConn3 establishes a session and talks KCP protocol over a packet connection.
-func NewConn3(convid uint32, raddr net.Addr, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
- return newUDPSession(convid, dataShards, parityShards, nil, conn, raddr, block), nil
-}
-
-// NewConn2 establishes a session and talks KCP protocol over a packet connection.
-func NewConn2(raddr net.Addr, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
- var convid uint32
- binary.Read(rand.Reader, binary.LittleEndian, &convid)
- return NewConn3(convid, raddr, block, dataShards, parityShards, conn)
-}
-
-// NewConn establishes a session and talks KCP protocol over a packet connection.
-func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
- udpaddr, err := net.ResolveUDPAddr("udp", raddr)
- if err != nil {
- return nil, errors.WithStack(err)
- }
- return NewConn2(udpaddr, block, dataShards, parityShards, conn)
-}
diff --git a/vendor/github.com/xtaci/kcp-go/snmp.go b/vendor/github.com/xtaci/kcp-go/snmp.go
deleted file mode 100644
index 607118e..0000000
--- a/vendor/github.com/xtaci/kcp-go/snmp.go
+++ /dev/null
@@ -1,164 +0,0 @@
-package kcp
-
-import (
- "fmt"
- "sync/atomic"
-)
-
-// Snmp defines network statistics indicator
-type Snmp struct {
- BytesSent uint64 // bytes sent from upper level
- BytesReceived uint64 // bytes received to upper level
- MaxConn uint64 // max number of connections ever reached
- ActiveOpens uint64 // accumulated active open connections
- PassiveOpens uint64 // accumulated passive open connections
- CurrEstab uint64 // current number of established connections
- InErrs uint64 // UDP read errors reported from net.PacketConn
- InCsumErrors uint64 // checksum errors from CRC32
- KCPInErrors uint64 // packet iput errors reported from KCP
- InPkts uint64 // incoming packets count
- OutPkts uint64 // outgoing packets count
- InSegs uint64 // incoming KCP segments
- OutSegs uint64 // outgoing KCP segments
- InBytes uint64 // UDP bytes received
- OutBytes uint64 // UDP bytes sent
- RetransSegs uint64 // accmulated retransmited segments
- FastRetransSegs uint64 // accmulated fast retransmitted segments
- EarlyRetransSegs uint64 // accmulated early retransmitted segments
- LostSegs uint64 // number of segs infered as lost
- RepeatSegs uint64 // number of segs duplicated
- FECRecovered uint64 // correct packets recovered from FEC
- FECErrs uint64 // incorrect packets recovered from FEC
- FECParityShards uint64 // FEC segments received
- FECShortShards uint64 // number of data shards that's not enough for recovery
-}
-
-func newSnmp() *Snmp {
- return new(Snmp)
-}
-
-// Header returns all field names
-func (s *Snmp) Header() []string {
- return []string{
- "BytesSent",
- "BytesReceived",
- "MaxConn",
- "ActiveOpens",
- "PassiveOpens",
- "CurrEstab",
- "InErrs",
- "InCsumErrors",
- "KCPInErrors",
- "InPkts",
- "OutPkts",
- "InSegs",
- "OutSegs",
- "InBytes",
- "OutBytes",
- "RetransSegs",
- "FastRetransSegs",
- "EarlyRetransSegs",
- "LostSegs",
- "RepeatSegs",
- "FECParityShards",
- "FECErrs",
- "FECRecovered",
- "FECShortShards",
- }
-}
-
-// ToSlice returns current snmp info as slice
-func (s *Snmp) ToSlice() []string {
- snmp := s.Copy()
- return []string{
- fmt.Sprint(snmp.BytesSent),
- fmt.Sprint(snmp.BytesReceived),
- fmt.Sprint(snmp.MaxConn),
- fmt.Sprint(snmp.ActiveOpens),
- fmt.Sprint(snmp.PassiveOpens),
- fmt.Sprint(snmp.CurrEstab),
- fmt.Sprint(snmp.InErrs),
- fmt.Sprint(snmp.InCsumErrors),
- fmt.Sprint(snmp.KCPInErrors),
- fmt.Sprint(snmp.InPkts),
- fmt.Sprint(snmp.OutPkts),
- fmt.Sprint(snmp.InSegs),
- fmt.Sprint(snmp.OutSegs),
- fmt.Sprint(snmp.InBytes),
- fmt.Sprint(snmp.OutBytes),
- fmt.Sprint(snmp.RetransSegs),
- fmt.Sprint(snmp.FastRetransSegs),
- fmt.Sprint(snmp.EarlyRetransSegs),
- fmt.Sprint(snmp.LostSegs),
- fmt.Sprint(snmp.RepeatSegs),
- fmt.Sprint(snmp.FECParityShards),
- fmt.Sprint(snmp.FECErrs),
- fmt.Sprint(snmp.FECRecovered),
- fmt.Sprint(snmp.FECShortShards),
- }
-}
-
-// Copy make a copy of current snmp snapshot
-func (s *Snmp) Copy() *Snmp {
- d := newSnmp()
- d.BytesSent = atomic.LoadUint64(&s.BytesSent)
- d.BytesReceived = atomic.LoadUint64(&s.BytesReceived)
- d.MaxConn = atomic.LoadUint64(&s.MaxConn)
- d.ActiveOpens = atomic.LoadUint64(&s.ActiveOpens)
- d.PassiveOpens = atomic.LoadUint64(&s.PassiveOpens)
- d.CurrEstab = atomic.LoadUint64(&s.CurrEstab)
- d.InErrs = atomic.LoadUint64(&s.InErrs)
- d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors)
- d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors)
- d.InPkts = atomic.LoadUint64(&s.InPkts)
- d.OutPkts = atomic.LoadUint64(&s.OutPkts)
- d.InSegs = atomic.LoadUint64(&s.InSegs)
- d.OutSegs = atomic.LoadUint64(&s.OutSegs)
- d.InBytes = atomic.LoadUint64(&s.InBytes)
- d.OutBytes = atomic.LoadUint64(&s.OutBytes)
- d.RetransSegs = atomic.LoadUint64(&s.RetransSegs)
- d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs)
- d.EarlyRetransSegs = atomic.LoadUint64(&s.EarlyRetransSegs)
- d.LostSegs = atomic.LoadUint64(&s.LostSegs)
- d.RepeatSegs = atomic.LoadUint64(&s.RepeatSegs)
- d.FECParityShards = atomic.LoadUint64(&s.FECParityShards)
- d.FECErrs = atomic.LoadUint64(&s.FECErrs)
- d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
- d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
- return d
-}
-
-// Reset values to zero
-func (s *Snmp) Reset() {
- atomic.StoreUint64(&s.BytesSent, 0)
- atomic.StoreUint64(&s.BytesReceived, 0)
- atomic.StoreUint64(&s.MaxConn, 0)
- atomic.StoreUint64(&s.ActiveOpens, 0)
- atomic.StoreUint64(&s.PassiveOpens, 0)
- atomic.StoreUint64(&s.CurrEstab, 0)
- atomic.StoreUint64(&s.InErrs, 0)
- atomic.StoreUint64(&s.InCsumErrors, 0)
- atomic.StoreUint64(&s.KCPInErrors, 0)
- atomic.StoreUint64(&s.InPkts, 0)
- atomic.StoreUint64(&s.OutPkts, 0)
- atomic.StoreUint64(&s.InSegs, 0)
- atomic.StoreUint64(&s.OutSegs, 0)
- atomic.StoreUint64(&s.InBytes, 0)
- atomic.StoreUint64(&s.OutBytes, 0)
- atomic.StoreUint64(&s.RetransSegs, 0)
- atomic.StoreUint64(&s.FastRetransSegs, 0)
- atomic.StoreUint64(&s.EarlyRetransSegs, 0)
- atomic.StoreUint64(&s.LostSegs, 0)
- atomic.StoreUint64(&s.RepeatSegs, 0)
- atomic.StoreUint64(&s.FECParityShards, 0)
- atomic.StoreUint64(&s.FECErrs, 0)
- atomic.StoreUint64(&s.FECRecovered, 0)
- atomic.StoreUint64(&s.FECShortShards, 0)
-}
-
-// DefaultSnmp is the global KCP connection statistics collector
-var DefaultSnmp *Snmp
-
-func init() {
- DefaultSnmp = newSnmp()
-}
diff --git a/vendor/github.com/xtaci/kcp-go/tx.go b/vendor/github.com/xtaci/kcp-go/tx.go
deleted file mode 100644
index 3397b82..0000000
--- a/vendor/github.com/xtaci/kcp-go/tx.go
+++ /dev/null
@@ -1,24 +0,0 @@
-package kcp
-
-import (
- "sync/atomic"
-
- "github.com/pkg/errors"
- "golang.org/x/net/ipv4"
-)
-
-func (s *UDPSession) defaultTx(txqueue []ipv4.Message) {
- nbytes := 0
- npkts := 0
- for k := range txqueue {
- if n, err := s.conn.WriteTo(txqueue[k].Buffers[0], txqueue[k].Addr); err == nil {
- nbytes += n
- npkts++
- } else {
- s.notifyWriteError(errors.WithStack(err))
- break
- }
- }
- atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
- atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
-}
diff --git a/vendor/github.com/xtaci/kcp-go/tx_generic.go b/vendor/github.com/xtaci/kcp-go/tx_generic.go
deleted file mode 100644
index 0b4f349..0000000
--- a/vendor/github.com/xtaci/kcp-go/tx_generic.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// +build !linux
-
-package kcp
-
-import (
- "golang.org/x/net/ipv4"
-)
-
-func (s *UDPSession) tx(txqueue []ipv4.Message) {
- s.defaultTx(txqueue)
-}
diff --git a/vendor/github.com/xtaci/kcp-go/tx_linux.go b/vendor/github.com/xtaci/kcp-go/tx_linux.go
deleted file mode 100644
index 4f19df5..0000000
--- a/vendor/github.com/xtaci/kcp-go/tx_linux.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// +build linux
-
-package kcp
-
-import (
- "net"
- "os"
- "sync/atomic"
-
- "github.com/pkg/errors"
- "golang.org/x/net/ipv4"
-)
-
-func (s *UDPSession) tx(txqueue []ipv4.Message) {
- // default version
- if s.xconn == nil || s.xconnWriteError != nil {
- s.defaultTx(txqueue)
- return
- }
-
- // x/net version
- nbytes := 0
- npkts := 0
- for len(txqueue) > 0 {
- if n, err := s.xconn.WriteBatch(txqueue, 0); err == nil {
- for k := range txqueue[:n] {
- nbytes += len(txqueue[k].Buffers[0])
- }
- npkts += n
- txqueue = txqueue[n:]
- } else {
- // compatibility issue:
- // for linux kernel<=2.6.32, support for sendmmsg is not available
- // an error of type os.SyscallError will be returned
- if operr, ok := err.(*net.OpError); ok {
- if se, ok := operr.Err.(*os.SyscallError); ok {
- if se.Syscall == "sendmmsg" {
- s.xconnWriteError = se
- s.defaultTx(txqueue)
- return
- }
- }
- }
- s.notifyWriteError(errors.WithStack(err))
- break
- }
- }
-
- atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
- atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
-}
diff --git a/vendor/github.com/xtaci/kcp-go/wechat_donate.jpg b/vendor/github.com/xtaci/kcp-go/wechat_donate.jpg
deleted file mode 100644
index ad72505..0000000
Binary files a/vendor/github.com/xtaci/kcp-go/wechat_donate.jpg and /dev/null differ
diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE
deleted file mode 100644
index 6a66aea..0000000
--- a/vendor/golang.org/x/crypto/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/crypto/PATENTS b/vendor/golang.org/x/crypto/PATENTS
deleted file mode 100644
index 7330990..0000000
--- a/vendor/golang.org/x/crypto/PATENTS
+++ /dev/null
@@ -1,22 +0,0 @@
-Additional IP Rights Grant (Patents)
-
-"This implementation" means the copyrightable works distributed by
-Google as part of the Go project.
-
-Google hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this section)
-patent license to make, have made, use, offer to sell, sell, import,
-transfer and otherwise run, modify and propagate the contents of this
-implementation of Go, where such license applies only to those patent
-claims, both currently owned or controlled by Google and acquired in
-the future, licensable by Google that are necessarily infringed by this
-implementation of Go. This grant does not include claims that would be
-infringed only as a consequence of further modification of this
-implementation. If you or your agent or exclusive licensee institute or
-order or agree to the institution of patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging
-that this implementation of Go or any code incorporated within this
-implementation of Go constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any patent
-rights granted to you under this License for this implementation of Go
-shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/crypto/argon2/argon2.go b/vendor/golang.org/x/crypto/argon2/argon2.go
deleted file mode 100644
index 29f0a2d..0000000
--- a/vendor/golang.org/x/crypto/argon2/argon2.go
+++ /dev/null
@@ -1,283 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package argon2 implements the key derivation function Argon2.
-// Argon2 was selected as the winner of the Password Hashing Competition and can
-// be used to derive cryptographic keys from passwords.
-//
-// For a detailed specification of Argon2 see [1].
-//
-// If you aren't sure which function you need, use Argon2id (IDKey) and
-// the parameter recommendations for your scenario.
-//
-// # Argon2i
-//
-// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
-// It uses data-independent memory access, which is preferred for password
-// hashing and password-based key derivation. Argon2i requires more passes over
-// memory than Argon2id to protect from trade-off attacks. The recommended
-// parameters (taken from [2]) for non-interactive operations are time=3 and to
-// use the maximum available memory.
-//
-// # Argon2id
-//
-// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
-// Argon2i and Argon2d. It uses data-independent memory access for the first
-// half of the first iteration over the memory and data-dependent memory access
-// for the rest. Argon2id is side-channel resistant and provides better brute-
-// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
-// parameters for non-interactive operations (taken from [2]) are time=1 and to
-// use the maximum available memory.
-//
-// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
-// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3
-package argon2
-
-import (
- "encoding/binary"
- "sync"
-
- "golang.org/x/crypto/blake2b"
-)
-
-// The Argon2 version implemented by this package.
-const Version = 0x13
-
-const (
- argon2d = iota
- argon2i
- argon2id
-)
-
-// Key derives a key from the password, salt, and cost parameters using Argon2i
-// returning a byte slice of length keyLen that can be used as cryptographic
-// key. The CPU cost and parallelism degree must be greater than zero.
-//
-// For example, you can get a derived key for e.g. AES-256 (which needs a
-// 32-byte key) by doing:
-//
-// key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
-//
-// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number.
-// If using that amount of memory (32 MB) is not possible in some contexts then
-// the time parameter can be increased to compensate.
-//
-// The time parameter specifies the number of passes over the memory and the
-// memory parameter specifies the size of the memory in KiB. For example
-// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
-// adjusted to the number of available CPUs. The cost parameters should be
-// increased as memory latency and CPU parallelism increases. Remember to get a
-// good random salt.
-func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
- return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen)
-}
-
-// IDKey derives a key from the password, salt, and cost parameters using
-// Argon2id returning a byte slice of length keyLen that can be used as
-// cryptographic key. The CPU cost and parallelism degree must be greater than
-// zero.
-//
-// For example, you can get a derived key for e.g. AES-256 (which needs a
-// 32-byte key) by doing:
-//
-// key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
-//
-// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number.
-// If using that amount of memory (64 MB) is not possible in some contexts then
-// the time parameter can be increased to compensate.
-//
-// The time parameter specifies the number of passes over the memory and the
-// memory parameter specifies the size of the memory in KiB. For example
-// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
-// adjusted to the numbers of available CPUs. The cost parameters should be
-// increased as memory latency and CPU parallelism increases. Remember to get a
-// good random salt.
-func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
- return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen)
-}
-
-func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
- if time < 1 {
- panic("argon2: number of rounds too small")
- }
- if threads < 1 {
- panic("argon2: parallelism degree too low")
- }
- h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
-
- memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
- if memory < 2*syncPoints*uint32(threads) {
- memory = 2 * syncPoints * uint32(threads)
- }
- B := initBlocks(&h0, memory, uint32(threads))
- processBlocks(B, time, memory, uint32(threads), mode)
- return extractKey(B, memory, uint32(threads), keyLen)
-}
-
-const (
- blockLength = 128
- syncPoints = 4
-)
-
-type block [blockLength]uint64
-
-func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
- var (
- h0 [blake2b.Size + 8]byte
- params [24]byte
- tmp [4]byte
- )
-
- b2, _ := blake2b.New512(nil)
- binary.LittleEndian.PutUint32(params[0:4], threads)
- binary.LittleEndian.PutUint32(params[4:8], keyLen)
- binary.LittleEndian.PutUint32(params[8:12], memory)
- binary.LittleEndian.PutUint32(params[12:16], time)
- binary.LittleEndian.PutUint32(params[16:20], uint32(Version))
- binary.LittleEndian.PutUint32(params[20:24], uint32(mode))
- b2.Write(params[:])
- binary.LittleEndian.PutUint32(tmp[:], uint32(len(password)))
- b2.Write(tmp[:])
- b2.Write(password)
- binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt)))
- b2.Write(tmp[:])
- b2.Write(salt)
- binary.LittleEndian.PutUint32(tmp[:], uint32(len(key)))
- b2.Write(tmp[:])
- b2.Write(key)
- binary.LittleEndian.PutUint32(tmp[:], uint32(len(data)))
- b2.Write(tmp[:])
- b2.Write(data)
- b2.Sum(h0[:0])
- return h0
-}
-
-func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
- var block0 [1024]byte
- B := make([]block, memory)
- for lane := uint32(0); lane < threads; lane++ {
- j := lane * (memory / threads)
- binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
-
- binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
- blake2bHash(block0[:], h0[:])
- for i := range B[j+0] {
- B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
- }
-
- binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
- blake2bHash(block0[:], h0[:])
- for i := range B[j+1] {
- B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
- }
- }
- return B
-}
-
-func processBlocks(B []block, time, memory, threads uint32, mode int) {
- lanes := memory / threads
- segments := lanes / syncPoints
-
- processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) {
- var addresses, in, zero block
- if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
- in[0] = uint64(n)
- in[1] = uint64(lane)
- in[2] = uint64(slice)
- in[3] = uint64(memory)
- in[4] = uint64(time)
- in[5] = uint64(mode)
- }
-
- index := uint32(0)
- if n == 0 && slice == 0 {
- index = 2 // we have already generated the first two blocks
- if mode == argon2i || mode == argon2id {
- in[6]++
- processBlock(&addresses, &in, &zero)
- processBlock(&addresses, &addresses, &zero)
- }
- }
-
- offset := lane*lanes + slice*segments + index
- var random uint64
- for index < segments {
- prev := offset - 1
- if index == 0 && slice == 0 {
- prev += lanes // last block in lane
- }
- if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
- if index%blockLength == 0 {
- in[6]++
- processBlock(&addresses, &in, &zero)
- processBlock(&addresses, &addresses, &zero)
- }
- random = addresses[index%blockLength]
- } else {
- random = B[prev][0]
- }
- newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index)
- processBlockXOR(&B[offset], &B[prev], &B[newOffset])
- index, offset = index+1, offset+1
- }
- wg.Done()
- }
-
- for n := uint32(0); n < time; n++ {
- for slice := uint32(0); slice < syncPoints; slice++ {
- var wg sync.WaitGroup
- for lane := uint32(0); lane < threads; lane++ {
- wg.Add(1)
- go processSegment(n, slice, lane, &wg)
- }
- wg.Wait()
- }
- }
-
-}
-
-func extractKey(B []block, memory, threads, keyLen uint32) []byte {
- lanes := memory / threads
- for lane := uint32(0); lane < threads-1; lane++ {
- for i, v := range B[(lane*lanes)+lanes-1] {
- B[memory-1][i] ^= v
- }
- }
-
- var block [1024]byte
- for i, v := range B[memory-1] {
- binary.LittleEndian.PutUint64(block[i*8:], v)
- }
- key := make([]byte, keyLen)
- blake2bHash(key, block[:])
- return key
-}
-
-func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
- refLane := uint32(rand>>32) % threads
- if n == 0 && slice == 0 {
- refLane = lane
- }
- m, s := 3*segments, ((slice+1)%syncPoints)*segments
- if lane == refLane {
- m += index
- }
- if n == 0 {
- m, s = slice*segments, 0
- if slice == 0 || lane == refLane {
- m += index
- }
- }
- if index == 0 || lane == refLane {
- m--
- }
- return phi(rand, uint64(m), uint64(s), refLane, lanes)
-}
-
-func phi(rand, m, s uint64, lane, lanes uint32) uint32 {
- p := rand & 0xFFFFFFFF
- p = (p * p) >> 32
- p = (p * m) >> 32
- return lane*lanes + uint32((s+m-(p+1))%uint64(lanes))
-}
diff --git a/vendor/golang.org/x/crypto/argon2/blake2b.go b/vendor/golang.org/x/crypto/argon2/blake2b.go
deleted file mode 100644
index 10f4694..0000000
--- a/vendor/golang.org/x/crypto/argon2/blake2b.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package argon2
-
-import (
- "encoding/binary"
- "hash"
-
- "golang.org/x/crypto/blake2b"
-)
-
-// blake2bHash computes an arbitrary long hash value of in
-// and writes the hash to out.
-func blake2bHash(out []byte, in []byte) {
- var b2 hash.Hash
- if n := len(out); n < blake2b.Size {
- b2, _ = blake2b.New(n, nil)
- } else {
- b2, _ = blake2b.New512(nil)
- }
-
- var buffer [blake2b.Size]byte
- binary.LittleEndian.PutUint32(buffer[:4], uint32(len(out)))
- b2.Write(buffer[:4])
- b2.Write(in)
-
- if len(out) <= blake2b.Size {
- b2.Sum(out[:0])
- return
- }
-
- outLen := len(out)
- b2.Sum(buffer[:0])
- b2.Reset()
- copy(out, buffer[:32])
- out = out[32:]
- for len(out) > blake2b.Size {
- b2.Write(buffer[:])
- b2.Sum(buffer[:0])
- copy(out, buffer[:32])
- out = out[32:]
- b2.Reset()
- }
-
- if outLen%blake2b.Size > 0 { // outLen > 64
- r := ((outLen + 31) / 32) - 2 // ⌈τ /32⌉-2
- b2, _ = blake2b.New(outLen-32*r, nil)
- }
- b2.Write(buffer[:])
- b2.Sum(out[:0])
-}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.go b/vendor/golang.org/x/crypto/argon2/blamka_amd64.go
deleted file mode 100644
index a014ac9..0000000
--- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && gc && !purego
-// +build amd64,gc,!purego
-
-package argon2
-
-import "golang.org/x/sys/cpu"
-
-func init() {
- useSSE4 = cpu.X86.HasSSE41
-}
-
-//go:noescape
-func mixBlocksSSE2(out, a, b, c *block)
-
-//go:noescape
-func xorBlocksSSE2(out, a, b, c *block)
-
-//go:noescape
-func blamkaSSE4(b *block)
-
-func processBlockSSE(out, in1, in2 *block, xor bool) {
- var t block
- mixBlocksSSE2(&t, in1, in2, &t)
- if useSSE4 {
- blamkaSSE4(&t)
- } else {
- for i := 0; i < blockLength; i += 16 {
- blamkaGeneric(
- &t[i+0], &t[i+1], &t[i+2], &t[i+3],
- &t[i+4], &t[i+5], &t[i+6], &t[i+7],
- &t[i+8], &t[i+9], &t[i+10], &t[i+11],
- &t[i+12], &t[i+13], &t[i+14], &t[i+15],
- )
- }
- for i := 0; i < blockLength/8; i += 2 {
- blamkaGeneric(
- &t[i], &t[i+1], &t[16+i], &t[16+i+1],
- &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
- &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
- &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
- )
- }
- }
- if xor {
- xorBlocksSSE2(out, in1, in2, &t)
- } else {
- mixBlocksSSE2(out, in1, in2, &t)
- }
-}
-
-func processBlock(out, in1, in2 *block) {
- processBlockSSE(out, in1, in2, false)
-}
-
-func processBlockXOR(out, in1, in2 *block) {
- processBlockSSE(out, in1, in2, true)
-}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
deleted file mode 100644
index b2cc051..0000000
--- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && gc && !purego
-// +build amd64,gc,!purego
-
-#include "textflag.h"
-
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFD $0xB1, v6, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- PSHUFB c40, v2; \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFB c48, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- MOVO v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v7, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- PSHUFB c40, v3; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFB c48, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- MOVO v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG_0(block, off) \
- MOVOU 8*(off+0)(block), X0; \
- MOVOU 8*(off+2)(block), X1; \
- MOVOU 8*(off+4)(block), X2; \
- MOVOU 8*(off+6)(block), X3; \
- MOVOU 8*(off+8)(block), X4; \
- MOVOU 8*(off+10)(block), X5; \
- MOVOU 8*(off+12)(block), X6; \
- MOVOU 8*(off+14)(block), X7
-
-#define STORE_MSG_0(block, off) \
- MOVOU X0, 8*(off+0)(block); \
- MOVOU X1, 8*(off+2)(block); \
- MOVOU X2, 8*(off+4)(block); \
- MOVOU X3, 8*(off+6)(block); \
- MOVOU X4, 8*(off+8)(block); \
- MOVOU X5, 8*(off+10)(block); \
- MOVOU X6, 8*(off+12)(block); \
- MOVOU X7, 8*(off+14)(block)
-
-#define LOAD_MSG_1(block, off) \
- MOVOU 8*off+0*8(block), X0; \
- MOVOU 8*off+16*8(block), X1; \
- MOVOU 8*off+32*8(block), X2; \
- MOVOU 8*off+48*8(block), X3; \
- MOVOU 8*off+64*8(block), X4; \
- MOVOU 8*off+80*8(block), X5; \
- MOVOU 8*off+96*8(block), X6; \
- MOVOU 8*off+112*8(block), X7
-
-#define STORE_MSG_1(block, off) \
- MOVOU X0, 8*off+0*8(block); \
- MOVOU X1, 8*off+16*8(block); \
- MOVOU X2, 8*off+32*8(block); \
- MOVOU X3, 8*off+48*8(block); \
- MOVOU X4, 8*off+64*8(block); \
- MOVOU X5, 8*off+80*8(block); \
- MOVOU X6, 8*off+96*8(block); \
- MOVOU X7, 8*off+112*8(block)
-
-#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \
- LOAD_MSG_0(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_0(block, off)
-
-#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \
- LOAD_MSG_1(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_1(block, off)
-
-// func blamkaSSE4(b *block)
-TEXT ·blamkaSSE4(SB), 4, $0-8
- MOVQ b+0(FP), AX
-
- MOVOU ·c40<>(SB), X10
- MOVOU ·c48<>(SB), X11
-
- BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11)
-
- BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11)
- RET
-
-// func mixBlocksSSE2(out, a, b, c *block)
-TEXT ·mixBlocksSSE2(SB), 4, $0-32
- MOVQ out+0(FP), DX
- MOVQ a+8(FP), AX
- MOVQ b+16(FP), BX
- MOVQ a+24(FP), CX
- MOVQ $128, BP
-
-loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
- PXOR X1, X0
- PXOR X2, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, BP
- JA loop
- RET
-
-// func xorBlocksSSE2(out, a, b, c *block)
-TEXT ·xorBlocksSSE2(SB), 4, $0-32
- MOVQ out+0(FP), DX
- MOVQ a+8(FP), AX
- MOVQ b+16(FP), BX
- MOVQ a+24(FP), CX
- MOVQ $128, BP
-
-loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
- MOVOU 0(DX), X3
- PXOR X1, X0
- PXOR X2, X0
- PXOR X3, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, BP
- JA loop
- RET
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_generic.go b/vendor/golang.org/x/crypto/argon2/blamka_generic.go
deleted file mode 100644
index a481b22..0000000
--- a/vendor/golang.org/x/crypto/argon2/blamka_generic.go
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package argon2
-
-var useSSE4 bool
-
-func processBlockGeneric(out, in1, in2 *block, xor bool) {
- var t block
- for i := range t {
- t[i] = in1[i] ^ in2[i]
- }
- for i := 0; i < blockLength; i += 16 {
- blamkaGeneric(
- &t[i+0], &t[i+1], &t[i+2], &t[i+3],
- &t[i+4], &t[i+5], &t[i+6], &t[i+7],
- &t[i+8], &t[i+9], &t[i+10], &t[i+11],
- &t[i+12], &t[i+13], &t[i+14], &t[i+15],
- )
- }
- for i := 0; i < blockLength/8; i += 2 {
- blamkaGeneric(
- &t[i], &t[i+1], &t[16+i], &t[16+i+1],
- &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
- &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
- &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
- )
- }
- if xor {
- for i := range t {
- out[i] ^= in1[i] ^ in2[i] ^ t[i]
- }
- } else {
- for i := range t {
- out[i] = in1[i] ^ in2[i] ^ t[i]
- }
- }
-}
-
-func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) {
- v00, v01, v02, v03 := *t00, *t01, *t02, *t03
- v04, v05, v06, v07 := *t04, *t05, *t06, *t07
- v08, v09, v10, v11 := *t08, *t09, *t10, *t11
- v12, v13, v14, v15 := *t12, *t13, *t14, *t15
-
- v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
- v12 ^= v00
- v12 = v12>>32 | v12<<32
- v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
- v04 ^= v08
- v04 = v04>>24 | v04<<40
-
- v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
- v12 ^= v00
- v12 = v12>>16 | v12<<48
- v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
- v04 ^= v08
- v04 = v04>>63 | v04<<1
-
- v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
- v13 ^= v01
- v13 = v13>>32 | v13<<32
- v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
- v05 ^= v09
- v05 = v05>>24 | v05<<40
-
- v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
- v13 ^= v01
- v13 = v13>>16 | v13<<48
- v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
- v05 ^= v09
- v05 = v05>>63 | v05<<1
-
- v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
- v14 ^= v02
- v14 = v14>>32 | v14<<32
- v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
- v06 ^= v10
- v06 = v06>>24 | v06<<40
-
- v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
- v14 ^= v02
- v14 = v14>>16 | v14<<48
- v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
- v06 ^= v10
- v06 = v06>>63 | v06<<1
-
- v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
- v15 ^= v03
- v15 = v15>>32 | v15<<32
- v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
- v07 ^= v11
- v07 = v07>>24 | v07<<40
-
- v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
- v15 ^= v03
- v15 = v15>>16 | v15<<48
- v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
- v07 ^= v11
- v07 = v07>>63 | v07<<1
-
- v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
- v15 ^= v00
- v15 = v15>>32 | v15<<32
- v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
- v05 ^= v10
- v05 = v05>>24 | v05<<40
-
- v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
- v15 ^= v00
- v15 = v15>>16 | v15<<48
- v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
- v05 ^= v10
- v05 = v05>>63 | v05<<1
-
- v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
- v12 ^= v01
- v12 = v12>>32 | v12<<32
- v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
- v06 ^= v11
- v06 = v06>>24 | v06<<40
-
- v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
- v12 ^= v01
- v12 = v12>>16 | v12<<48
- v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
- v06 ^= v11
- v06 = v06>>63 | v06<<1
-
- v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
- v13 ^= v02
- v13 = v13>>32 | v13<<32
- v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
- v07 ^= v08
- v07 = v07>>24 | v07<<40
-
- v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
- v13 ^= v02
- v13 = v13>>16 | v13<<48
- v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
- v07 ^= v08
- v07 = v07>>63 | v07<<1
-
- v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
- v14 ^= v03
- v14 = v14>>32 | v14<<32
- v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
- v04 ^= v09
- v04 = v04>>24 | v04<<40
-
- v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
- v14 ^= v03
- v14 = v14>>16 | v14<<48
- v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
- v04 ^= v09
- v04 = v04>>63 | v04<<1
-
- *t00, *t01, *t02, *t03 = v00, v01, v02, v03
- *t04, *t05, *t06, *t07 = v04, v05, v06, v07
- *t08, *t09, *t10, *t11 = v08, v09, v10, v11
- *t12, *t13, *t14, *t15 = v12, v13, v14, v15
-}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_ref.go b/vendor/golang.org/x/crypto/argon2/blamka_ref.go
deleted file mode 100644
index 167c59d..0000000
--- a/vendor/golang.org/x/crypto/argon2/blamka_ref.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !amd64 || purego || !gc
-// +build !amd64 purego !gc
-
-package argon2
-
-func processBlock(out, in1, in2 *block) {
- processBlockGeneric(out, in1, in2, false)
-}
-
-func processBlockXOR(out, in1, in2 *block) {
- processBlockGeneric(out, in1, in2, true)
-}
diff --git a/vendor/golang.org/x/crypto/bcrypt/base64.go b/vendor/golang.org/x/crypto/bcrypt/base64.go
deleted file mode 100644
index fc31160..0000000
--- a/vendor/golang.org/x/crypto/bcrypt/base64.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bcrypt
-
-import "encoding/base64"
-
-const alphabet = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-
-var bcEncoding = base64.NewEncoding(alphabet)
-
-func base64Encode(src []byte) []byte {
- n := bcEncoding.EncodedLen(len(src))
- dst := make([]byte, n)
- bcEncoding.Encode(dst, src)
- for dst[n-1] == '=' {
- n--
- }
- return dst[:n]
-}
-
-func base64Decode(src []byte) ([]byte, error) {
- numOfEquals := 4 - (len(src) % 4)
- for i := 0; i < numOfEquals; i++ {
- src = append(src, '=')
- }
-
- dst := make([]byte, bcEncoding.DecodedLen(len(src)))
- n, err := bcEncoding.Decode(dst, src)
- if err != nil {
- return nil, err
- }
- return dst[:n], nil
-}
diff --git a/vendor/golang.org/x/crypto/bcrypt/bcrypt.go b/vendor/golang.org/x/crypto/bcrypt/bcrypt.go
deleted file mode 100644
index aeb73f8..0000000
--- a/vendor/golang.org/x/crypto/bcrypt/bcrypt.go
+++ /dev/null
@@ -1,295 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package bcrypt implements Provos and Mazières's bcrypt adaptive hashing
-// algorithm. See http://www.usenix.org/event/usenix99/provos/provos.pdf
-package bcrypt // import "golang.org/x/crypto/bcrypt"
-
-// The code is a port of Provos and Mazières's C implementation.
-import (
- "crypto/rand"
- "crypto/subtle"
- "errors"
- "fmt"
- "io"
- "strconv"
-
- "golang.org/x/crypto/blowfish"
-)
-
-const (
- MinCost int = 4 // the minimum allowable cost as passed in to GenerateFromPassword
- MaxCost int = 31 // the maximum allowable cost as passed in to GenerateFromPassword
- DefaultCost int = 10 // the cost that will actually be set if a cost below MinCost is passed into GenerateFromPassword
-)
-
-// The error returned from CompareHashAndPassword when a password and hash do
-// not match.
-var ErrMismatchedHashAndPassword = errors.New("crypto/bcrypt: hashedPassword is not the hash of the given password")
-
-// The error returned from CompareHashAndPassword when a hash is too short to
-// be a bcrypt hash.
-var ErrHashTooShort = errors.New("crypto/bcrypt: hashedSecret too short to be a bcrypted password")
-
-// The error returned from CompareHashAndPassword when a hash was created with
-// a bcrypt algorithm newer than this implementation.
-type HashVersionTooNewError byte
-
-func (hv HashVersionTooNewError) Error() string {
- return fmt.Sprintf("crypto/bcrypt: bcrypt algorithm version '%c' requested is newer than current version '%c'", byte(hv), majorVersion)
-}
-
-// The error returned from CompareHashAndPassword when a hash starts with something other than '$'
-type InvalidHashPrefixError byte
-
-func (ih InvalidHashPrefixError) Error() string {
- return fmt.Sprintf("crypto/bcrypt: bcrypt hashes must start with '$', but hashedSecret started with '%c'", byte(ih))
-}
-
-type InvalidCostError int
-
-func (ic InvalidCostError) Error() string {
- return fmt.Sprintf("crypto/bcrypt: cost %d is outside allowed range (%d,%d)", int(ic), int(MinCost), int(MaxCost))
-}
-
-const (
- majorVersion = '2'
- minorVersion = 'a'
- maxSaltSize = 16
- maxCryptedHashSize = 23
- encodedSaltSize = 22
- encodedHashSize = 31
- minHashSize = 59
-)
-
-// magicCipherData is an IV for the 64 Blowfish encryption calls in
-// bcrypt(). It's the string "OrpheanBeholderScryDoubt" in big-endian bytes.
-var magicCipherData = []byte{
- 0x4f, 0x72, 0x70, 0x68,
- 0x65, 0x61, 0x6e, 0x42,
- 0x65, 0x68, 0x6f, 0x6c,
- 0x64, 0x65, 0x72, 0x53,
- 0x63, 0x72, 0x79, 0x44,
- 0x6f, 0x75, 0x62, 0x74,
-}
-
-type hashed struct {
- hash []byte
- salt []byte
- cost int // allowed range is MinCost to MaxCost
- major byte
- minor byte
-}
-
-// GenerateFromPassword returns the bcrypt hash of the password at the given
-// cost. If the cost given is less than MinCost, the cost will be set to
-// DefaultCost, instead. Use CompareHashAndPassword, as defined in this package,
-// to compare the returned hashed password with its cleartext version.
-func GenerateFromPassword(password []byte, cost int) ([]byte, error) {
- p, err := newFromPassword(password, cost)
- if err != nil {
- return nil, err
- }
- return p.Hash(), nil
-}
-
-// CompareHashAndPassword compares a bcrypt hashed password with its possible
-// plaintext equivalent. Returns nil on success, or an error on failure.
-func CompareHashAndPassword(hashedPassword, password []byte) error {
- p, err := newFromHash(hashedPassword)
- if err != nil {
- return err
- }
-
- otherHash, err := bcrypt(password, p.cost, p.salt)
- if err != nil {
- return err
- }
-
- otherP := &hashed{otherHash, p.salt, p.cost, p.major, p.minor}
- if subtle.ConstantTimeCompare(p.Hash(), otherP.Hash()) == 1 {
- return nil
- }
-
- return ErrMismatchedHashAndPassword
-}
-
-// Cost returns the hashing cost used to create the given hashed
-// password. When, in the future, the hashing cost of a password system needs
-// to be increased in order to adjust for greater computational power, this
-// function allows one to establish which passwords need to be updated.
-func Cost(hashedPassword []byte) (int, error) {
- p, err := newFromHash(hashedPassword)
- if err != nil {
- return 0, err
- }
- return p.cost, nil
-}
-
-func newFromPassword(password []byte, cost int) (*hashed, error) {
- if cost < MinCost {
- cost = DefaultCost
- }
- p := new(hashed)
- p.major = majorVersion
- p.minor = minorVersion
-
- err := checkCost(cost)
- if err != nil {
- return nil, err
- }
- p.cost = cost
-
- unencodedSalt := make([]byte, maxSaltSize)
- _, err = io.ReadFull(rand.Reader, unencodedSalt)
- if err != nil {
- return nil, err
- }
-
- p.salt = base64Encode(unencodedSalt)
- hash, err := bcrypt(password, p.cost, p.salt)
- if err != nil {
- return nil, err
- }
- p.hash = hash
- return p, err
-}
-
-func newFromHash(hashedSecret []byte) (*hashed, error) {
- if len(hashedSecret) < minHashSize {
- return nil, ErrHashTooShort
- }
- p := new(hashed)
- n, err := p.decodeVersion(hashedSecret)
- if err != nil {
- return nil, err
- }
- hashedSecret = hashedSecret[n:]
- n, err = p.decodeCost(hashedSecret)
- if err != nil {
- return nil, err
- }
- hashedSecret = hashedSecret[n:]
-
- // The "+2" is here because we'll have to append at most 2 '=' to the salt
- // when base64 decoding it in expensiveBlowfishSetup().
- p.salt = make([]byte, encodedSaltSize, encodedSaltSize+2)
- copy(p.salt, hashedSecret[:encodedSaltSize])
-
- hashedSecret = hashedSecret[encodedSaltSize:]
- p.hash = make([]byte, len(hashedSecret))
- copy(p.hash, hashedSecret)
-
- return p, nil
-}
-
-func bcrypt(password []byte, cost int, salt []byte) ([]byte, error) {
- cipherData := make([]byte, len(magicCipherData))
- copy(cipherData, magicCipherData)
-
- c, err := expensiveBlowfishSetup(password, uint32(cost), salt)
- if err != nil {
- return nil, err
- }
-
- for i := 0; i < 24; i += 8 {
- for j := 0; j < 64; j++ {
- c.Encrypt(cipherData[i:i+8], cipherData[i:i+8])
- }
- }
-
- // Bug compatibility with C bcrypt implementations. We only encode 23 of
- // the 24 bytes encrypted.
- hsh := base64Encode(cipherData[:maxCryptedHashSize])
- return hsh, nil
-}
-
-func expensiveBlowfishSetup(key []byte, cost uint32, salt []byte) (*blowfish.Cipher, error) {
- csalt, err := base64Decode(salt)
- if err != nil {
- return nil, err
- }
-
- // Bug compatibility with C bcrypt implementations. They use the trailing
- // NULL in the key string during expansion.
- // We copy the key to prevent changing the underlying array.
- ckey := append(key[:len(key):len(key)], 0)
-
- c, err := blowfish.NewSaltedCipher(ckey, csalt)
- if err != nil {
- return nil, err
- }
-
- var i, rounds uint64
- rounds = 1 << cost
- for i = 0; i < rounds; i++ {
- blowfish.ExpandKey(ckey, c)
- blowfish.ExpandKey(csalt, c)
- }
-
- return c, nil
-}
-
-func (p *hashed) Hash() []byte {
- arr := make([]byte, 60)
- arr[0] = '$'
- arr[1] = p.major
- n := 2
- if p.minor != 0 {
- arr[2] = p.minor
- n = 3
- }
- arr[n] = '$'
- n++
- copy(arr[n:], []byte(fmt.Sprintf("%02d", p.cost)))
- n += 2
- arr[n] = '$'
- n++
- copy(arr[n:], p.salt)
- n += encodedSaltSize
- copy(arr[n:], p.hash)
- n += encodedHashSize
- return arr[:n]
-}
-
-func (p *hashed) decodeVersion(sbytes []byte) (int, error) {
- if sbytes[0] != '$' {
- return -1, InvalidHashPrefixError(sbytes[0])
- }
- if sbytes[1] > majorVersion {
- return -1, HashVersionTooNewError(sbytes[1])
- }
- p.major = sbytes[1]
- n := 3
- if sbytes[2] != '$' {
- p.minor = sbytes[2]
- n++
- }
- return n, nil
-}
-
-// sbytes should begin where decodeVersion left off.
-func (p *hashed) decodeCost(sbytes []byte) (int, error) {
- cost, err := strconv.Atoi(string(sbytes[0:2]))
- if err != nil {
- return -1, err
- }
- err = checkCost(cost)
- if err != nil {
- return -1, err
- }
- p.cost = cost
- return 3, nil
-}
-
-func (p *hashed) String() string {
- return fmt.Sprintf("&{hash: %#v, salt: %#v, cost: %d, major: %c, minor: %c}", string(p.hash), p.salt, p.cost, p.major, p.minor)
-}
-
-func checkCost(cost int) error {
- if cost < MinCost || cost > MaxCost {
- return InvalidCostError(cost)
- }
- return nil
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go
deleted file mode 100644
index d2e98d4..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b.go
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
-// and the extendable output function (XOF) BLAKE2Xb.
-//
-// BLAKE2b is optimized for 64-bit platforms—including NEON-enabled ARMs—and
-// produces digests of any size between 1 and 64 bytes.
-// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
-// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
-//
-// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
-// If you need a secret-key MAC (message authentication code), use the New512
-// function with a non-nil key.
-//
-// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
-// can produce hash values between 0 and 4 GiB.
-package blake2b
-
-import (
- "encoding/binary"
- "errors"
- "hash"
-)
-
-const (
- // The blocksize of BLAKE2b in bytes.
- BlockSize = 128
- // The hash size of BLAKE2b-512 in bytes.
- Size = 64
- // The hash size of BLAKE2b-384 in bytes.
- Size384 = 48
- // The hash size of BLAKE2b-256 in bytes.
- Size256 = 32
-)
-
-var (
- useAVX2 bool
- useAVX bool
- useSSE4 bool
-)
-
-var (
- errKeySize = errors.New("blake2b: invalid key size")
- errHashSize = errors.New("blake2b: invalid hash size")
-)
-
-var iv = [8]uint64{
- 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
- 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
-}
-
-// Sum512 returns the BLAKE2b-512 checksum of the data.
-func Sum512(data []byte) [Size]byte {
- var sum [Size]byte
- checkSum(&sum, Size, data)
- return sum
-}
-
-// Sum384 returns the BLAKE2b-384 checksum of the data.
-func Sum384(data []byte) [Size384]byte {
- var sum [Size]byte
- var sum384 [Size384]byte
- checkSum(&sum, Size384, data)
- copy(sum384[:], sum[:Size384])
- return sum384
-}
-
-// Sum256 returns the BLAKE2b-256 checksum of the data.
-func Sum256(data []byte) [Size256]byte {
- var sum [Size]byte
- var sum256 [Size256]byte
- checkSum(&sum, Size256, data)
- copy(sum256[:], sum[:Size256])
- return sum256
-}
-
-// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
-// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
-func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
-
-// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
-// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
-func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
-
-// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
-// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
-func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
-
-// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length.
-// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long.
-// The hash size can be a value between 1 and 64 but it is highly recommended to use
-// values equal or greater than:
-// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long).
-// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long).
-// When the key is nil, the returned hash.Hash implements BinaryMarshaler
-// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash.
-func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) }
-
-func newDigest(hashSize int, key []byte) (*digest, error) {
- if hashSize < 1 || hashSize > Size {
- return nil, errHashSize
- }
- if len(key) > Size {
- return nil, errKeySize
- }
- d := &digest{
- size: hashSize,
- keyLen: len(key),
- }
- copy(d.key[:], key)
- d.Reset()
- return d, nil
-}
-
-func checkSum(sum *[Size]byte, hashSize int, data []byte) {
- h := iv
- h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
- var c [2]uint64
-
- if length := len(data); length > BlockSize {
- n := length &^ (BlockSize - 1)
- if length == n {
- n -= BlockSize
- }
- hashBlocks(&h, &c, 0, data[:n])
- data = data[n:]
- }
-
- var block [BlockSize]byte
- offset := copy(block[:], data)
- remaining := uint64(BlockSize - offset)
- if c[0] < remaining {
- c[1]--
- }
- c[0] -= remaining
-
- hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
-
- for i, v := range h[:(hashSize+7)/8] {
- binary.LittleEndian.PutUint64(sum[8*i:], v)
- }
-}
-
-type digest struct {
- h [8]uint64
- c [2]uint64
- size int
- block [BlockSize]byte
- offset int
-
- key [BlockSize]byte
- keyLen int
-}
-
-const (
- magic = "b2b"
- marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1
-)
-
-func (d *digest) MarshalBinary() ([]byte, error) {
- if d.keyLen != 0 {
- return nil, errors.New("crypto/blake2b: cannot marshal MACs")
- }
- b := make([]byte, 0, marshaledSize)
- b = append(b, magic...)
- for i := 0; i < 8; i++ {
- b = appendUint64(b, d.h[i])
- }
- b = appendUint64(b, d.c[0])
- b = appendUint64(b, d.c[1])
- // Maximum value for size is 64
- b = append(b, byte(d.size))
- b = append(b, d.block[:]...)
- b = append(b, byte(d.offset))
- return b, nil
-}
-
-func (d *digest) UnmarshalBinary(b []byte) error {
- if len(b) < len(magic) || string(b[:len(magic)]) != magic {
- return errors.New("crypto/blake2b: invalid hash state identifier")
- }
- if len(b) != marshaledSize {
- return errors.New("crypto/blake2b: invalid hash state size")
- }
- b = b[len(magic):]
- for i := 0; i < 8; i++ {
- b, d.h[i] = consumeUint64(b)
- }
- b, d.c[0] = consumeUint64(b)
- b, d.c[1] = consumeUint64(b)
- d.size = int(b[0])
- b = b[1:]
- copy(d.block[:], b[:BlockSize])
- b = b[BlockSize:]
- d.offset = int(b[0])
- return nil
-}
-
-func (d *digest) BlockSize() int { return BlockSize }
-
-func (d *digest) Size() int { return d.size }
-
-func (d *digest) Reset() {
- d.h = iv
- d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
- d.offset, d.c[0], d.c[1] = 0, 0, 0
- if d.keyLen > 0 {
- d.block = d.key
- d.offset = BlockSize
- }
-}
-
-func (d *digest) Write(p []byte) (n int, err error) {
- n = len(p)
-
- if d.offset > 0 {
- remaining := BlockSize - d.offset
- if n <= remaining {
- d.offset += copy(d.block[d.offset:], p)
- return
- }
- copy(d.block[d.offset:], p[:remaining])
- hashBlocks(&d.h, &d.c, 0, d.block[:])
- d.offset = 0
- p = p[remaining:]
- }
-
- if length := len(p); length > BlockSize {
- nn := length &^ (BlockSize - 1)
- if length == nn {
- nn -= BlockSize
- }
- hashBlocks(&d.h, &d.c, 0, p[:nn])
- p = p[nn:]
- }
-
- if len(p) > 0 {
- d.offset += copy(d.block[:], p)
- }
-
- return
-}
-
-func (d *digest) Sum(sum []byte) []byte {
- var hash [Size]byte
- d.finalize(&hash)
- return append(sum, hash[:d.size]...)
-}
-
-func (d *digest) finalize(hash *[Size]byte) {
- var block [BlockSize]byte
- copy(block[:], d.block[:d.offset])
- remaining := uint64(BlockSize - d.offset)
-
- c := d.c
- if c[0] < remaining {
- c[1]--
- }
- c[0] -= remaining
-
- h := d.h
- hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
-
- for i, v := range h {
- binary.LittleEndian.PutUint64(hash[8*i:], v)
- }
-}
-
-func appendUint64(b []byte, x uint64) []byte {
- var a [8]byte
- binary.BigEndian.PutUint64(a[:], x)
- return append(b, a[:]...)
-}
-
-func appendUint32(b []byte, x uint32) []byte {
- var a [4]byte
- binary.BigEndian.PutUint32(a[:], x)
- return append(b, a[:]...)
-}
-
-func consumeUint64(b []byte) ([]byte, uint64) {
- x := binary.BigEndian.Uint64(b)
- return b[8:], x
-}
-
-func consumeUint32(b []byte) ([]byte, uint32) {
- x := binary.BigEndian.Uint32(b)
- return b[4:], x
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
deleted file mode 100644
index 56bfaaa..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.7 && amd64 && gc && !purego
-// +build go1.7,amd64,gc,!purego
-
-package blake2b
-
-import "golang.org/x/sys/cpu"
-
-func init() {
- useAVX2 = cpu.X86.HasAVX2
- useAVX = cpu.X86.HasAVX
- useSSE4 = cpu.X86.HasSSE41
-}
-
-//go:noescape
-func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-
-//go:noescape
-func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-
-//go:noescape
-func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-
-func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
- switch {
- case useAVX2:
- hashBlocksAVX2(h, c, flag, blocks)
- case useAVX:
- hashBlocksAVX(h, c, flag, blocks)
- case useSSE4:
- hashBlocksSSE4(h, c, flag, blocks)
- default:
- hashBlocksGeneric(h, c, flag, blocks)
- }
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
deleted file mode 100644
index 4b9daa1..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
+++ /dev/null
@@ -1,745 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.7 && amd64 && gc && !purego
-// +build go1.7,amd64,gc,!purego
-
-#include "textflag.h"
-
-DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
-
-#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
-#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
-#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
-#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
-#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
-
-#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
- VPADDQ m0, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m1, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y1_Y1; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y3_Y3; \
- VPADDQ m2, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m3, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y3_Y3; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y1_Y1
-
-#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
-#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
-#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
-#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
-#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
-
-#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
-#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
-#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
-#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
-#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
-
-#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
-#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
-#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
-#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
-#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
-
-#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
-
-#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
-#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
-
-// load msg: Y12 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y12, Y12
-
-// load msg: Y13 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
- VMOVQ_SI_X13(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X13(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y13, Y13
-
-// load msg: Y14 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
- VMOVQ_SI_X14(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X14(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y14, Y14
-
-// load msg: Y15 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
- VMOVQ_SI_X15(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X15(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X11(6*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
- LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
- LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
-
-#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
- LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
- LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
- VMOVQ_SI_X11(11*8); \
- VPSHUFD $0x4E, 0*8(SI), X14; \
- VPINSRQ_1_SI_X11(5*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
-
-#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
- VMOVQ_SI_X11(5*8); \
- VMOVDQU 11*8(SI), X12; \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- VMOVQ_SI_X13(8*8); \
- VMOVQ_SI_X11(2*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X11(13*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
- LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
-
-#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
- LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
- LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
- LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
- VMOVQ_SI_X15(6*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X15(10*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
- LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X13(7*8); \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
- LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
-
-#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
- LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
- LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
-
-#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
- LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
- LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
- VMOVQ_SI_X14_0; \
- VPSHUFD $0x4E, 8*8(SI), X11; \
- VPINSRQ_1_SI_X14(6*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
-
-#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
- LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
- LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
- LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
- VMOVQ_SI_X15_0; \
- VMOVQ_SI_X11(6*8); \
- VPINSRQ_1_SI_X15(4*8); \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
- VMOVQ_SI_X12(6*8); \
- VMOVQ_SI_X11(11*8); \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
- VMOVQ_SI_X11(1*8); \
- VMOVDQU 12*8(SI), X14; \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- VMOVQ_SI_X15(2*8); \
- VMOVDQU 4*8(SI), X11; \
- VPINSRQ_1_SI_X15(7*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
- LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
- VMOVQ_SI_X13(2*8); \
- VPSHUFD $0x4E, 5*8(SI), X11; \
- VPINSRQ_1_SI_X13(4*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
- VMOVQ_SI_X15(11*8); \
- VMOVQ_SI_X11(12*8); \
- VPINSRQ_1_SI_X15(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y15, Y15
-
-// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, DX
- ADDQ $31, DX
- ANDQ $~31, DX
-
- MOVQ CX, 16(DX)
- XORQ CX, CX
- MOVQ CX, 24(DX)
-
- VMOVDQU ·AVX2_c40<>(SB), Y4
- VMOVDQU ·AVX2_c48<>(SB), Y5
-
- VMOVDQU 0(AX), Y8
- VMOVDQU 32(AX), Y9
- VMOVDQU ·AVX2_iv0<>(SB), Y6
- VMOVDQU ·AVX2_iv1<>(SB), Y7
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
- MOVQ R9, 8(DX)
-
-loop:
- ADDQ $128, R8
- MOVQ R8, 0(DX)
- CMPQ R8, $128
- JGE noinc
- INCQ R9
- MOVQ R9, 8(DX)
-
-noinc:
- VMOVDQA Y8, Y0
- VMOVDQA Y9, Y1
- VMOVDQA Y6, Y2
- VPXOR 0(DX), Y7, Y3
-
- LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
- VMOVDQA Y12, 32(DX)
- VMOVDQA Y13, 64(DX)
- VMOVDQA Y14, 96(DX)
- VMOVDQA Y15, 128(DX)
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
- VMOVDQA Y12, 160(DX)
- VMOVDQA Y13, 192(DX)
- VMOVDQA Y14, 224(DX)
- VMOVDQA Y15, 256(DX)
-
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-
- ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
- ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
-
- VPXOR Y0, Y8, Y8
- VPXOR Y1, Y9, Y9
- VPXOR Y2, Y8, Y8
- VPXOR Y3, Y9, Y9
-
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
-
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
-
- VMOVDQU Y8, 0(AX)
- VMOVDQU Y9, 32(AX)
- VZEROUPPER
-
- RET
-
-#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
-#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
-#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
-#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
-#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
-
-#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
-#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
-
-#define SHUFFLE_AVX() \
- VMOVDQA X6, X13; \
- VMOVDQA X2, X14; \
- VMOVDQA X4, X6; \
- VPUNPCKLQDQ_X13_X13_X15; \
- VMOVDQA X5, X4; \
- VMOVDQA X6, X5; \
- VPUNPCKHQDQ_X15_X7_X6; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X13_X7; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VPUNPCKHQDQ_X15_X2_X2; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X3_X3; \
-
-#define SHUFFLE_AVX_INV() \
- VMOVDQA X2, X13; \
- VMOVDQA X4, X14; \
- VPUNPCKLQDQ_X2_X2_X15; \
- VMOVDQA X5, X4; \
- VPUNPCKHQDQ_X15_X3_X2; \
- VMOVDQA X14, X5; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VMOVDQA X6, X14; \
- VPUNPCKHQDQ_X15_X13_X3; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X6_X6; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X7_X7; \
-
-#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- VPADDQ m0, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m1, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFD $-79, v6, v6; \
- VPSHUFD $-79, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPSHUFB c40, v2, v2; \
- VPSHUFB c40, v3, v3; \
- VPADDQ m2, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m3, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFB c48, v6, v6; \
- VPSHUFB c48, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPADDQ v2, v2, t0; \
- VPSRLQ $63, v2, v2; \
- VPXOR t0, v2, v2; \
- VPADDQ v3, v3, t0; \
- VPSRLQ $63, v3, v3; \
- VPXOR t0, v3, v3
-
-// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
-// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
-#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X13(i2*8); \
- VMOVQ_SI_X14(i4*8); \
- VMOVQ_SI_X15(i6*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X13(i3*8); \
- VPINSRQ_1_SI_X14(i5*8); \
- VPINSRQ_1_SI_X15(i7*8)
-
-// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
-#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(1*8); \
- VMOVQ_SI_X15(5*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X13(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(7*8)
-
-// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
-#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
- VPSHUFD $0x4E, 0*8(SI), X12; \
- VMOVQ_SI_X13(11*8); \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(7*8); \
- VPINSRQ_1_SI_X13(5*8); \
- VPINSRQ_1_SI_X14(2*8); \
- VPINSRQ_1_SI_X15(3*8)
-
-// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
-#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
- VMOVDQU 11*8(SI), X12; \
- VMOVQ_SI_X13(5*8); \
- VMOVQ_SI_X14(8*8); \
- VMOVQ_SI_X15(2*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14_0; \
- VPINSRQ_1_SI_X15(13*8)
-
-// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
-#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(6*8); \
- VMOVQ_SI_X15_0; \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(8*8)
-
-// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
-#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
- VMOVQ_SI_X12(9*8); \
- VMOVQ_SI_X13(2*8); \
- VMOVQ_SI_X14_0; \
- VMOVQ_SI_X15(4*8); \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VPINSRQ_1_SI_X15(15*8)
-
-// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
-#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(11*8); \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X13(8*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(3*8)
-
-// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
-#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
- MOVQ 0*8(SI), X12; \
- VPSHUFD $0x4E, 8*8(SI), X13; \
- MOVQ 7*8(SI), X14; \
- MOVQ 2*8(SI), X15; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(11*8)
-
-// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
-#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
- MOVQ 6*8(SI), X12; \
- MOVQ 11*8(SI), X13; \
- MOVQ 15*8(SI), X14; \
- MOVQ 3*8(SI), X15; \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X14(9*8); \
- VPINSRQ_1_SI_X15(8*8)
-
-// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
-#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
- MOVQ 5*8(SI), X12; \
- MOVQ 8*8(SI), X13; \
- MOVQ 0*8(SI), X14; \
- MOVQ 6*8(SI), X15; \
- VPINSRQ_1_SI_X12(15*8); \
- VPINSRQ_1_SI_X13(2*8); \
- VPINSRQ_1_SI_X14(4*8); \
- VPINSRQ_1_SI_X15(10*8)
-
-// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
-#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
- VMOVDQU 12*8(SI), X12; \
- MOVQ 1*8(SI), X13; \
- MOVQ 2*8(SI), X14; \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VMOVDQU 4*8(SI), X15
-
-// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
-#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
- MOVQ 15*8(SI), X12; \
- MOVQ 3*8(SI), X13; \
- MOVQ 11*8(SI), X14; \
- MOVQ 12*8(SI), X15; \
- VPINSRQ_1_SI_X12(9*8); \
- VPINSRQ_1_SI_X13(13*8); \
- VPINSRQ_1_SI_X14(14*8); \
- VPINSRQ_1_SI_X15_0
-
-// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- VMOVDQU ·AVX_c40<>(SB), X0
- VMOVDQU ·AVX_c48<>(SB), X1
- VMOVDQA X0, X8
- VMOVDQA X1, X9
-
- VMOVDQU ·AVX_iv3<>(SB), X0
- VMOVDQA X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0)
-
- VMOVDQU 0(AX), X10
- VMOVDQU 16(AX), X11
- VMOVDQU 32(AX), X2
- VMOVDQU 48(AX), X3
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
-
-loop:
- ADDQ $128, R8
- CMPQ R8, $128
- JGE noinc
- INCQ R9
-
-noinc:
- VMOVQ_R8_X15
- VPINSRQ_1_R9_X15
-
- VMOVDQA X10, X0
- VMOVDQA X11, X1
- VMOVDQU ·AVX_iv0<>(SB), X4
- VMOVDQU ·AVX_iv1<>(SB), X5
- VMOVDQU ·AVX_iv2<>(SB), X6
-
- VPXOR X15, X6, X6
- VMOVDQA 0(R10), X7
-
- LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
- VMOVDQA X12, 16(R10)
- VMOVDQA X13, 32(R10)
- VMOVDQA X14, 48(R10)
- VMOVDQA X15, 64(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
- VMOVDQA X12, 80(R10)
- VMOVDQA X13, 96(R10)
- VMOVDQA X14, 112(R10)
- VMOVDQA X15, 128(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
- VMOVDQA X12, 144(R10)
- VMOVDQA X13, 160(R10)
- VMOVDQA X14, 176(R10)
- VMOVDQA X15, 192(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
- VMOVDQA X12, 208(R10)
- VMOVDQA X13, 224(R10)
- VMOVDQA X14, 240(R10)
- VMOVDQA X15, 256(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- VMOVDQU 32(AX), X14
- VMOVDQU 48(AX), X15
- VPXOR X0, X10, X10
- VPXOR X1, X11, X11
- VPXOR X2, X14, X14
- VPXOR X3, X15, X15
- VPXOR X4, X10, X10
- VPXOR X5, X11, X11
- VPXOR X6, X14, X2
- VPXOR X7, X15, X3
- VMOVDQU X2, 32(AX)
- VMOVDQU X3, 48(AX)
-
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
-
- VMOVDQU X10, 0(AX)
- VMOVDQU X11, 16(AX)
-
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
- VZEROUPPER
-
- RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go
deleted file mode 100644
index 5fa1b32..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !go1.7 && amd64 && gc && !purego
-// +build !go1.7,amd64,gc,!purego
-
-package blake2b
-
-import "golang.org/x/sys/cpu"
-
-func init() {
- useSSE4 = cpu.X86.HasSSE41
-}
-
-//go:noescape
-func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-
-func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
- if useSSE4 {
- hashBlocksSSE4(h, c, flag, blocks)
- } else {
- hashBlocksGeneric(h, c, flag, blocks)
- }
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
deleted file mode 100644
index ae75eb9..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && gc && !purego
-// +build amd64,gc,!purego
-
-#include "textflag.h"
-
-DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- PADDQ m0, v0; \
- PADDQ m1, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v6, v6; \
- PSHUFD $0xB1, v7, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- PSHUFB c40, v2; \
- PSHUFB c40, v3; \
- PADDQ m2, v0; \
- PADDQ m3, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFB c48, v6; \
- PSHUFB c48, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- MOVOU v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVOU v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
- MOVQ i0*8(src), m0; \
- PINSRQ $1, i1*8(src), m0; \
- MOVQ i2*8(src), m1; \
- PINSRQ $1, i3*8(src), m1; \
- MOVQ i4*8(src), m2; \
- PINSRQ $1, i5*8(src), m2; \
- MOVQ i6*8(src), m3; \
- PINSRQ $1, i7*8(src), m3
-
-// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- MOVOU ·iv3<>(SB), X0
- MOVO X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0)
-
- MOVOU ·c40<>(SB), X13
- MOVOU ·c48<>(SB), X14
-
- MOVOU 0(AX), X12
- MOVOU 16(AX), X15
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
-
-loop:
- ADDQ $128, R8
- CMPQ R8, $128
- JGE noinc
- INCQ R9
-
-noinc:
- MOVQ R8, X8
- PINSRQ $1, R9, X8
-
- MOVO X12, X0
- MOVO X15, X1
- MOVOU 32(AX), X2
- MOVOU 48(AX), X3
- MOVOU ·iv0<>(SB), X4
- MOVOU ·iv1<>(SB), X5
- MOVOU ·iv2<>(SB), X6
-
- PXOR X8, X6
- MOVO 0(R10), X7
-
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
- MOVO X8, 16(R10)
- MOVO X9, 32(R10)
- MOVO X10, 48(R10)
- MOVO X11, 64(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
- MOVO X8, 80(R10)
- MOVO X9, 96(R10)
- MOVO X10, 112(R10)
- MOVO X11, 128(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
- MOVO X8, 144(R10)
- MOVO X9, 160(R10)
- MOVO X10, 176(R10)
- MOVO X11, 192(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
- MOVO X8, 208(R10)
- MOVO X9, 224(R10)
- MOVO X10, 240(R10)
- MOVO X11, 256(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- MOVOU 32(AX), X10
- MOVOU 48(AX), X11
- PXOR X0, X12
- PXOR X1, X15
- PXOR X2, X10
- PXOR X3, X11
- PXOR X4, X12
- PXOR X5, X15
- PXOR X6, X10
- PXOR X7, X11
- MOVOU X10, 32(AX)
- MOVOU X11, 48(AX)
-
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
-
- MOVOU X12, 0(AX)
- MOVOU X15, 16(AX)
-
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
-
- RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
deleted file mode 100644
index 3168a8a..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package blake2b
-
-import (
- "encoding/binary"
- "math/bits"
-)
-
-// the precomputed values for BLAKE2b
-// there are 12 16-byte arrays - one for each round
-// the entries are calculated from the sigma constants.
-var precomputed = [12][16]byte{
- {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
- {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
- {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
- {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
- {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
- {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
- {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
- {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
- {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
- {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
- {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
- {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
-}
-
-func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
- var m [16]uint64
- c0, c1 := c[0], c[1]
-
- for i := 0; i < len(blocks); {
- c0 += BlockSize
- if c0 < BlockSize {
- c1++
- }
-
- v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
- v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
- v12 ^= c0
- v13 ^= c1
- v14 ^= flag
-
- for j := range m {
- m[j] = binary.LittleEndian.Uint64(blocks[i:])
- i += 8
- }
-
- for j := range precomputed {
- s := &(precomputed[j])
-
- v0 += m[s[0]]
- v0 += v4
- v12 ^= v0
- v12 = bits.RotateLeft64(v12, -32)
- v8 += v12
- v4 ^= v8
- v4 = bits.RotateLeft64(v4, -24)
- v1 += m[s[1]]
- v1 += v5
- v13 ^= v1
- v13 = bits.RotateLeft64(v13, -32)
- v9 += v13
- v5 ^= v9
- v5 = bits.RotateLeft64(v5, -24)
- v2 += m[s[2]]
- v2 += v6
- v14 ^= v2
- v14 = bits.RotateLeft64(v14, -32)
- v10 += v14
- v6 ^= v10
- v6 = bits.RotateLeft64(v6, -24)
- v3 += m[s[3]]
- v3 += v7
- v15 ^= v3
- v15 = bits.RotateLeft64(v15, -32)
- v11 += v15
- v7 ^= v11
- v7 = bits.RotateLeft64(v7, -24)
-
- v0 += m[s[4]]
- v0 += v4
- v12 ^= v0
- v12 = bits.RotateLeft64(v12, -16)
- v8 += v12
- v4 ^= v8
- v4 = bits.RotateLeft64(v4, -63)
- v1 += m[s[5]]
- v1 += v5
- v13 ^= v1
- v13 = bits.RotateLeft64(v13, -16)
- v9 += v13
- v5 ^= v9
- v5 = bits.RotateLeft64(v5, -63)
- v2 += m[s[6]]
- v2 += v6
- v14 ^= v2
- v14 = bits.RotateLeft64(v14, -16)
- v10 += v14
- v6 ^= v10
- v6 = bits.RotateLeft64(v6, -63)
- v3 += m[s[7]]
- v3 += v7
- v15 ^= v3
- v15 = bits.RotateLeft64(v15, -16)
- v11 += v15
- v7 ^= v11
- v7 = bits.RotateLeft64(v7, -63)
-
- v0 += m[s[8]]
- v0 += v5
- v15 ^= v0
- v15 = bits.RotateLeft64(v15, -32)
- v10 += v15
- v5 ^= v10
- v5 = bits.RotateLeft64(v5, -24)
- v1 += m[s[9]]
- v1 += v6
- v12 ^= v1
- v12 = bits.RotateLeft64(v12, -32)
- v11 += v12
- v6 ^= v11
- v6 = bits.RotateLeft64(v6, -24)
- v2 += m[s[10]]
- v2 += v7
- v13 ^= v2
- v13 = bits.RotateLeft64(v13, -32)
- v8 += v13
- v7 ^= v8
- v7 = bits.RotateLeft64(v7, -24)
- v3 += m[s[11]]
- v3 += v4
- v14 ^= v3
- v14 = bits.RotateLeft64(v14, -32)
- v9 += v14
- v4 ^= v9
- v4 = bits.RotateLeft64(v4, -24)
-
- v0 += m[s[12]]
- v0 += v5
- v15 ^= v0
- v15 = bits.RotateLeft64(v15, -16)
- v10 += v15
- v5 ^= v10
- v5 = bits.RotateLeft64(v5, -63)
- v1 += m[s[13]]
- v1 += v6
- v12 ^= v1
- v12 = bits.RotateLeft64(v12, -16)
- v11 += v12
- v6 ^= v11
- v6 = bits.RotateLeft64(v6, -63)
- v2 += m[s[14]]
- v2 += v7
- v13 ^= v2
- v13 = bits.RotateLeft64(v13, -16)
- v8 += v13
- v7 ^= v8
- v7 = bits.RotateLeft64(v7, -63)
- v3 += m[s[15]]
- v3 += v4
- v14 ^= v3
- v14 = bits.RotateLeft64(v14, -16)
- v9 += v14
- v4 ^= v9
- v4 = bits.RotateLeft64(v4, -63)
-
- }
-
- h[0] ^= v0 ^ v8
- h[1] ^= v1 ^ v9
- h[2] ^= v2 ^ v10
- h[3] ^= v3 ^ v11
- h[4] ^= v4 ^ v12
- h[5] ^= v5 ^ v13
- h[6] ^= v6 ^ v14
- h[7] ^= v7 ^ v15
- }
- c[0], c[1] = c0, c1
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
deleted file mode 100644
index b0137cd..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !amd64 || purego || !gc
-// +build !amd64 purego !gc
-
-package blake2b
-
-func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
- hashBlocksGeneric(h, c, flag, blocks)
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2x.go b/vendor/golang.org/x/crypto/blake2b/blake2x.go
deleted file mode 100644
index 52c414d..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2x.go
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package blake2b
-
-import (
- "encoding/binary"
- "errors"
- "io"
-)
-
-// XOF defines the interface to hash functions that
-// support arbitrary-length output.
-type XOF interface {
- // Write absorbs more data into the hash's state. It panics if called
- // after Read.
- io.Writer
-
- // Read reads more output from the hash. It returns io.EOF if the limit
- // has been reached.
- io.Reader
-
- // Clone returns a copy of the XOF in its current state.
- Clone() XOF
-
- // Reset resets the XOF to its initial state.
- Reset()
-}
-
-// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
-// the length of the output is not known in advance.
-const OutputLengthUnknown = 0
-
-// magicUnknownOutputLength is a magic value for the output size that indicates
-// an unknown number of output bytes.
-const magicUnknownOutputLength = (1 << 32) - 1
-
-// maxOutputLength is the absolute maximum number of bytes to produce when the
-// number of output bytes is unknown.
-const maxOutputLength = (1 << 32) * 64
-
-// NewXOF creates a new variable-output-length hash. The hash either produce a
-// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
-// (size == OutputLengthUnknown). In the latter case, an absolute limit of
-// 256GiB applies.
-//
-// A non-nil key turns the hash into a MAC. The key must between
-// zero and 32 bytes long.
-func NewXOF(size uint32, key []byte) (XOF, error) {
- if len(key) > Size {
- return nil, errKeySize
- }
- if size == magicUnknownOutputLength {
- // 2^32-1 indicates an unknown number of bytes and thus isn't a
- // valid length.
- return nil, errors.New("blake2b: XOF length too large")
- }
- if size == OutputLengthUnknown {
- size = magicUnknownOutputLength
- }
- x := &xof{
- d: digest{
- size: Size,
- keyLen: len(key),
- },
- length: size,
- }
- copy(x.d.key[:], key)
- x.Reset()
- return x, nil
-}
-
-type xof struct {
- d digest
- length uint32
- remaining uint64
- cfg, root, block [Size]byte
- offset int
- nodeOffset uint32
- readMode bool
-}
-
-func (x *xof) Write(p []byte) (n int, err error) {
- if x.readMode {
- panic("blake2b: write to XOF after read")
- }
- return x.d.Write(p)
-}
-
-func (x *xof) Clone() XOF {
- clone := *x
- return &clone
-}
-
-func (x *xof) Reset() {
- x.cfg[0] = byte(Size)
- binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
- binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length
- x.cfg[17] = byte(Size) // inner hash size
-
- x.d.Reset()
- x.d.h[1] ^= uint64(x.length) << 32
-
- x.remaining = uint64(x.length)
- if x.remaining == magicUnknownOutputLength {
- x.remaining = maxOutputLength
- }
- x.offset, x.nodeOffset = 0, 0
- x.readMode = false
-}
-
-func (x *xof) Read(p []byte) (n int, err error) {
- if !x.readMode {
- x.d.finalize(&x.root)
- x.readMode = true
- }
-
- if x.remaining == 0 {
- return 0, io.EOF
- }
-
- n = len(p)
- if uint64(n) > x.remaining {
- n = int(x.remaining)
- p = p[:n]
- }
-
- if x.offset > 0 {
- blockRemaining := Size - x.offset
- if n < blockRemaining {
- x.offset += copy(p, x.block[x.offset:])
- x.remaining -= uint64(n)
- return
- }
- copy(p, x.block[x.offset:])
- p = p[blockRemaining:]
- x.offset = 0
- x.remaining -= uint64(blockRemaining)
- }
-
- for len(p) >= Size {
- binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
- x.nodeOffset++
-
- x.d.initConfig(&x.cfg)
- x.d.Write(x.root[:])
- x.d.finalize(&x.block)
-
- copy(p, x.block[:])
- p = p[Size:]
- x.remaining -= uint64(Size)
- }
-
- if todo := len(p); todo > 0 {
- if x.remaining < uint64(Size) {
- x.cfg[0] = byte(x.remaining)
- }
- binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
- x.nodeOffset++
-
- x.d.initConfig(&x.cfg)
- x.d.Write(x.root[:])
- x.d.finalize(&x.block)
-
- x.offset = copy(p, x.block[:todo])
- x.remaining -= uint64(todo)
- }
- return
-}
-
-func (d *digest) initConfig(cfg *[Size]byte) {
- d.offset, d.c[0], d.c[1] = 0, 0, 0
- for i := range d.h {
- d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:])
- }
-}
diff --git a/vendor/golang.org/x/crypto/blake2b/register.go b/vendor/golang.org/x/crypto/blake2b/register.go
deleted file mode 100644
index 9d86339..0000000
--- a/vendor/golang.org/x/crypto/blake2b/register.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.9
-// +build go1.9
-
-package blake2b
-
-import (
- "crypto"
- "hash"
-)
-
-func init() {
- newHash256 := func() hash.Hash {
- h, _ := New256(nil)
- return h
- }
- newHash384 := func() hash.Hash {
- h, _ := New384(nil)
- return h
- }
-
- newHash512 := func() hash.Hash {
- h, _ := New512(nil)
- return h
- }
-
- crypto.RegisterHash(crypto.BLAKE2b_256, newHash256)
- crypto.RegisterHash(crypto.BLAKE2b_384, newHash384)
- crypto.RegisterHash(crypto.BLAKE2b_512, newHash512)
-}
diff --git a/vendor/golang.org/x/crypto/blowfish/block.go b/vendor/golang.org/x/crypto/blowfish/block.go
deleted file mode 100644
index 9d80f19..0000000
--- a/vendor/golang.org/x/crypto/blowfish/block.go
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package blowfish
-
-// getNextWord returns the next big-endian uint32 value from the byte slice
-// at the given position in a circular manner, updating the position.
-func getNextWord(b []byte, pos *int) uint32 {
- var w uint32
- j := *pos
- for i := 0; i < 4; i++ {
- w = w<<8 | uint32(b[j])
- j++
- if j >= len(b) {
- j = 0
- }
- }
- *pos = j
- return w
-}
-
-// ExpandKey performs a key expansion on the given *Cipher. Specifically, it
-// performs the Blowfish algorithm's key schedule which sets up the *Cipher's
-// pi and substitution tables for calls to Encrypt. This is used, primarily,
-// by the bcrypt package to reuse the Blowfish key schedule during its
-// set up. It's unlikely that you need to use this directly.
-func ExpandKey(key []byte, c *Cipher) {
- j := 0
- for i := 0; i < 18; i++ {
- // Using inlined getNextWord for performance.
- var d uint32
- for k := 0; k < 4; k++ {
- d = d<<8 | uint32(key[j])
- j++
- if j >= len(key) {
- j = 0
- }
- }
- c.p[i] ^= d
- }
-
- var l, r uint32
- for i := 0; i < 18; i += 2 {
- l, r = encryptBlock(l, r, c)
- c.p[i], c.p[i+1] = l, r
- }
-
- for i := 0; i < 256; i += 2 {
- l, r = encryptBlock(l, r, c)
- c.s0[i], c.s0[i+1] = l, r
- }
- for i := 0; i < 256; i += 2 {
- l, r = encryptBlock(l, r, c)
- c.s1[i], c.s1[i+1] = l, r
- }
- for i := 0; i < 256; i += 2 {
- l, r = encryptBlock(l, r, c)
- c.s2[i], c.s2[i+1] = l, r
- }
- for i := 0; i < 256; i += 2 {
- l, r = encryptBlock(l, r, c)
- c.s3[i], c.s3[i+1] = l, r
- }
-}
-
-// This is similar to ExpandKey, but folds the salt during the key
-// schedule. While ExpandKey is essentially expandKeyWithSalt with an all-zero
-// salt passed in, reusing ExpandKey turns out to be a place of inefficiency
-// and specializing it here is useful.
-func expandKeyWithSalt(key []byte, salt []byte, c *Cipher) {
- j := 0
- for i := 0; i < 18; i++ {
- c.p[i] ^= getNextWord(key, &j)
- }
-
- j = 0
- var l, r uint32
- for i := 0; i < 18; i += 2 {
- l ^= getNextWord(salt, &j)
- r ^= getNextWord(salt, &j)
- l, r = encryptBlock(l, r, c)
- c.p[i], c.p[i+1] = l, r
- }
-
- for i := 0; i < 256; i += 2 {
- l ^= getNextWord(salt, &j)
- r ^= getNextWord(salt, &j)
- l, r = encryptBlock(l, r, c)
- c.s0[i], c.s0[i+1] = l, r
- }
-
- for i := 0; i < 256; i += 2 {
- l ^= getNextWord(salt, &j)
- r ^= getNextWord(salt, &j)
- l, r = encryptBlock(l, r, c)
- c.s1[i], c.s1[i+1] = l, r
- }
-
- for i := 0; i < 256; i += 2 {
- l ^= getNextWord(salt, &j)
- r ^= getNextWord(salt, &j)
- l, r = encryptBlock(l, r, c)
- c.s2[i], c.s2[i+1] = l, r
- }
-
- for i := 0; i < 256; i += 2 {
- l ^= getNextWord(salt, &j)
- r ^= getNextWord(salt, &j)
- l, r = encryptBlock(l, r, c)
- c.s3[i], c.s3[i+1] = l, r
- }
-}
-
-func encryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
- xl, xr := l, r
- xl ^= c.p[0]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[1]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[2]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[3]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[4]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[5]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[6]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[7]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[8]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[9]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[10]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[11]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[12]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[13]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[14]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[15]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[16]
- xr ^= c.p[17]
- return xr, xl
-}
-
-func decryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
- xl, xr := l, r
- xl ^= c.p[17]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[16]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[15]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[14]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[13]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[12]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[11]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[10]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[9]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[8]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[7]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[6]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[5]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[4]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[3]
- xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[2]
- xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[1]
- xr ^= c.p[0]
- return xr, xl
-}
diff --git a/vendor/golang.org/x/crypto/blowfish/cipher.go b/vendor/golang.org/x/crypto/blowfish/cipher.go
deleted file mode 100644
index 213bf20..0000000
--- a/vendor/golang.org/x/crypto/blowfish/cipher.go
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package blowfish implements Bruce Schneier's Blowfish encryption algorithm.
-//
-// Blowfish is a legacy cipher and its short block size makes it vulnerable to
-// birthday bound attacks (see https://sweet32.info). It should only be used
-// where compatibility with legacy systems, not security, is the goal.
-//
-// Deprecated: any new system should use AES (from crypto/aes, if necessary in
-// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
-// golang.org/x/crypto/chacha20poly1305).
-package blowfish // import "golang.org/x/crypto/blowfish"
-
-// The code is a port of Bruce Schneier's C implementation.
-// See https://www.schneier.com/blowfish.html.
-
-import "strconv"
-
-// The Blowfish block size in bytes.
-const BlockSize = 8
-
-// A Cipher is an instance of Blowfish encryption using a particular key.
-type Cipher struct {
- p [18]uint32
- s0, s1, s2, s3 [256]uint32
-}
-
-type KeySizeError int
-
-func (k KeySizeError) Error() string {
- return "crypto/blowfish: invalid key size " + strconv.Itoa(int(k))
-}
-
-// NewCipher creates and returns a Cipher.
-// The key argument should be the Blowfish key, from 1 to 56 bytes.
-func NewCipher(key []byte) (*Cipher, error) {
- var result Cipher
- if k := len(key); k < 1 || k > 56 {
- return nil, KeySizeError(k)
- }
- initCipher(&result)
- ExpandKey(key, &result)
- return &result, nil
-}
-
-// NewSaltedCipher creates a returns a Cipher that folds a salt into its key
-// schedule. For most purposes, NewCipher, instead of NewSaltedCipher, is
-// sufficient and desirable. For bcrypt compatibility, the key can be over 56
-// bytes.
-func NewSaltedCipher(key, salt []byte) (*Cipher, error) {
- if len(salt) == 0 {
- return NewCipher(key)
- }
- var result Cipher
- if k := len(key); k < 1 {
- return nil, KeySizeError(k)
- }
- initCipher(&result)
- expandKeyWithSalt(key, salt, &result)
- return &result, nil
-}
-
-// BlockSize returns the Blowfish block size, 8 bytes.
-// It is necessary to satisfy the Block interface in the
-// package "crypto/cipher".
-func (c *Cipher) BlockSize() int { return BlockSize }
-
-// Encrypt encrypts the 8-byte buffer src using the key k
-// and stores the result in dst.
-// Note that for amounts of data larger than a block,
-// it is not safe to just call Encrypt on successive blocks;
-// instead, use an encryption mode like CBC (see crypto/cipher/cbc.go).
-func (c *Cipher) Encrypt(dst, src []byte) {
- l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
- r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
- l, r = encryptBlock(l, r, c)
- dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
- dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
-}
-
-// Decrypt decrypts the 8-byte buffer src using the key k
-// and stores the result in dst.
-func (c *Cipher) Decrypt(dst, src []byte) {
- l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
- r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
- l, r = decryptBlock(l, r, c)
- dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
- dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
-}
-
-func initCipher(c *Cipher) {
- copy(c.p[0:], p[0:])
- copy(c.s0[0:], s0[0:])
- copy(c.s1[0:], s1[0:])
- copy(c.s2[0:], s2[0:])
- copy(c.s3[0:], s3[0:])
-}
diff --git a/vendor/golang.org/x/crypto/blowfish/const.go b/vendor/golang.org/x/crypto/blowfish/const.go
deleted file mode 100644
index d040775..0000000
--- a/vendor/golang.org/x/crypto/blowfish/const.go
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// The startup permutation array and substitution boxes.
-// They are the hexadecimal digits of PI; see:
-// https://www.schneier.com/code/constants.txt.
-
-package blowfish
-
-var s0 = [256]uint32{
- 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, 0xb8e1afed, 0x6a267e96,
- 0xba7c9045, 0xf12c7f99, 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
- 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, 0x0d95748f, 0x728eb658,
- 0x718bcd58, 0x82154aee, 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
- 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, 0x8e79dcb0, 0x603a180e,
- 0x6c9e0e8b, 0xb01e8a3e, 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
- 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, 0x55ca396a, 0x2aab10b6,
- 0xb4cc5c34, 0x1141e8ce, 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
- 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, 0xafd6ba33, 0x6c24cf5c,
- 0x7a325381, 0x28958677, 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
- 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, 0xef845d5d, 0xe98575b1,
- 0xdc262302, 0xeb651b88, 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
- 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, 0x21c66842, 0xf6e96c9a,
- 0x670c9c61, 0xabd388f0, 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
- 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, 0xa1f1651d, 0x39af0176,
- 0x66ca593e, 0x82430e88, 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
- 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, 0x4ed3aa62, 0x363f7706,
- 0x1bfedf72, 0x429b023d, 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
- 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, 0xe3fe501a, 0xb6794c3b,
- 0x976ce0bd, 0x04c006ba, 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
- 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, 0x6dfc511f, 0x9b30952c,
- 0xcc814544, 0xaf5ebd09, 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
- 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, 0x5579c0bd, 0x1a60320a,
- 0xd6a100c6, 0x402c7279, 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
- 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, 0x323db5fa, 0xfd238760,
- 0x53317b48, 0x3e00df82, 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
- 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, 0x695b27b0, 0xbbca58c8,
- 0xe1ffa35d, 0xb8f011a0, 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
- 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, 0xe1ddf2da, 0xa4cb7e33,
- 0x62fb1341, 0xcee4c6e8, 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
- 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, 0xd08ed1d0, 0xafc725e0,
- 0x8e3c5b2f, 0x8e7594b7, 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
- 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, 0x2f2f2218, 0xbe0e1777,
- 0xea752dfe, 0x8b021fa1, 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
- 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, 0x165fa266, 0x80957705,
- 0x93cc7314, 0x211a1477, 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
- 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, 0x00250e2d, 0x2071b35e,
- 0x226800bb, 0x57b8e0af, 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
- 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, 0x83260376, 0x6295cfa9,
- 0x11c81968, 0x4e734a41, 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
- 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, 0x08ba6fb5, 0x571be91f,
- 0xf296ec6b, 0x2a0dd915, 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
- 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
-}
-
-var s1 = [256]uint32{
- 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, 0xad6ea6b0, 0x49a7df7d,
- 0x9cee60b8, 0x8fedb266, 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
- 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, 0x3f54989a, 0x5b429d65,
- 0x6b8fe4d6, 0x99f73fd6, 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
- 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, 0x09686b3f, 0x3ebaefc9,
- 0x3c971814, 0x6b6a70a1, 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
- 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, 0xb03ada37, 0xf0500c0d,
- 0xf01c1f04, 0x0200b3ff, 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
- 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, 0x3ae5e581, 0x37c2dadc,
- 0xc8b57634, 0x9af3dda7, 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
- 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, 0x4e548b38, 0x4f6db908,
- 0x6f420d03, 0xf60a04bf, 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
- 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, 0x5512721f, 0x2e6b7124,
- 0x501adde6, 0x9f84cd87, 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
- 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, 0xef1c1847, 0x3215d908,
- 0xdd433b37, 0x24c2ba16, 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
- 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, 0x043556f1, 0xd7a3c76b,
- 0x3c11183b, 0x5924a509, 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
- 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, 0x771fe71c, 0x4e3d06fa,
- 0x2965dcb9, 0x99e71d0f, 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
- 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, 0xf2f74ea7, 0x361d2b3d,
- 0x1939260f, 0x19c27960, 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
- 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, 0xc332ddef, 0xbe6c5aa5,
- 0x65582185, 0x68ab9802, 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
- 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, 0x13cca830, 0xeb61bd96,
- 0x0334fe1e, 0xaa0363cf, 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
- 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, 0x648b1eaf, 0x19bdf0ca,
- 0xa02369b9, 0x655abb50, 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
- 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, 0xf837889a, 0x97e32d77,
- 0x11ed935f, 0x16681281, 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
- 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, 0xcdb30aeb, 0x532e3054,
- 0x8fd948e4, 0x6dbc3128, 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
- 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, 0x45eee2b6, 0xa3aaabea,
- 0xdb6c4f15, 0xfacb4fd0, 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
- 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, 0xcf62a1f2, 0x5b8d2646,
- 0xfc8883a0, 0xc1c7b6a3, 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
- 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, 0x58428d2a, 0x0c55f5ea,
- 0x1dadf43e, 0x233f7061, 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
- 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, 0xa6078084, 0x19f8509e,
- 0xe8efd855, 0x61d99735, 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
- 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, 0xdb73dbd3, 0x105588cd,
- 0x675fda79, 0xe3674340, 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
- 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
-}
-
-var s2 = [256]uint32{
- 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, 0x411520f7, 0x7602d4f7,
- 0xbcf46b2e, 0xd4a20068, 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
- 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, 0x4d95fc1d, 0x96b591af,
- 0x70f4ddd3, 0x66a02f45, 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
- 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, 0x28507825, 0x530429f4,
- 0x0a2c86da, 0xe9b66dfb, 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
- 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, 0xaace1e7c, 0xd3375fec,
- 0xce78a399, 0x406b2a42, 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
- 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, 0x3a6efa74, 0xdd5b4332,
- 0x6841e7f7, 0xca7820fb, 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
- 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, 0x55a867bc, 0xa1159a58,
- 0xcca92963, 0x99e1db33, 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
- 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, 0x95c11548, 0xe4c66d22,
- 0x48c1133f, 0xc70f86dc, 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
- 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, 0x257b7834, 0x602a9c60,
- 0xdff8e8a3, 0x1f636c1b, 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
- 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, 0x85b2a20e, 0xe6ba0d99,
- 0xde720c8c, 0x2da2f728, 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
- 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, 0x0a476341, 0x992eff74,
- 0x3a6f6eab, 0xf4f8fd37, 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
- 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, 0xf1290dc7, 0xcc00ffa3,
- 0xb5390f92, 0x690fed0b, 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
- 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, 0x37392eb3, 0xcc115979,
- 0x8026e297, 0xf42e312d, 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
- 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, 0x1a6b1018, 0x11caedfa,
- 0x3d25bdd8, 0xe2e1c3c9, 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
- 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, 0x9dbc8057, 0xf0f7c086,
- 0x60787bf8, 0x6003604d, 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
- 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, 0x77a057be, 0xbde8ae24,
- 0x55464299, 0xbf582e61, 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
- 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, 0x7aeb2661, 0x8b1ddf84,
- 0x846a0e79, 0x915f95e2, 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
- 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, 0xb77f19b6, 0xe0a9dc09,
- 0x662d09a1, 0xc4324633, 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
- 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, 0xdcb7da83, 0x573906fe,
- 0xa1e2ce9b, 0x4fcd7f52, 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
- 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, 0xf0177a28, 0xc0f586e0,
- 0x006058aa, 0x30dc7d62, 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
- 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, 0x6f05e409, 0x4b7c0188,
- 0x39720a3d, 0x7c927c24, 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
- 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, 0x1e50ef5e, 0xb161e6f8,
- 0xa28514d9, 0x6c51133c, 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
- 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
-}
-
-var s3 = [256]uint32{
- 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, 0x5cb0679e, 0x4fa33742,
- 0xd3822740, 0x99bc9bbe, 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
- 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, 0x5748ab2f, 0xbc946e79,
- 0xc6a376d2, 0x6549c2c8, 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
- 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, 0xa1fad5f0, 0x6a2d519a,
- 0x63ef8ce2, 0x9a86ee22, 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
- 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, 0x2826a2f9, 0xa73a3ae1,
- 0x4ba99586, 0xef5562e9, 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
- 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, 0xe990fd5a, 0x9e34d797,
- 0x2cf0b7d9, 0x022b8b51, 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
- 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, 0xe029ac71, 0xe019a5e6,
- 0x47b0acfd, 0xed93fa9b, 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
- 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, 0x15056dd4, 0x88f46dba,
- 0x03a16125, 0x0564f0bd, 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
- 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, 0x7533d928, 0xb155fdf5,
- 0x03563482, 0x8aba3cbb, 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
- 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, 0xea7a90c2, 0xfb3e7bce,
- 0x5121ce64, 0x774fbe32, 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
- 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, 0xb39a460a, 0x6445c0dd,
- 0x586cdecf, 0x1c20c8ae, 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
- 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, 0x72eacea8, 0xfa6484bb,
- 0x8d6612ae, 0xbf3c6f47, 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
- 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, 0x4040cb08, 0x4eb4e2cc,
- 0x34d2466a, 0x0115af84, 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
- 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, 0x611560b1, 0xe7933fdc,
- 0xbb3a792b, 0x344525bd, 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
- 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, 0x1a908749, 0xd44fbd9a,
- 0xd0dadecb, 0xd50ada38, 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
- 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, 0xbf97222c, 0x15e6fc2a,
- 0x0f91fc71, 0x9b941525, 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
- 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, 0xe0ec6e0e, 0x1698db3b,
- 0x4c98a0be, 0x3278e964, 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
- 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, 0xdf359f8d, 0x9b992f2e,
- 0xe60b6f47, 0x0fe3f11d, 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
- 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, 0xf523f357, 0xa6327623,
- 0x93a83531, 0x56cccd02, 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
- 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, 0xe6c6c7bd, 0x327a140a,
- 0x45e1d006, 0xc3f27b9a, 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
- 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, 0x53113ec0, 0x1640e3d3,
- 0x38abbd60, 0x2547adf0, 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
- 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, 0x1948c25c, 0x02fb8a8c,
- 0x01c36ae4, 0xd6ebe1f9, 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
- 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
-}
-
-var p = [18]uint32{
- 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 0xa4093822, 0x299f31d0,
- 0x082efa98, 0xec4e6c89, 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
- 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, 0x9216d5d9, 0x8979fb1b,
-}
diff --git a/vendor/golang.org/x/crypto/cast5/cast5.go b/vendor/golang.org/x/crypto/cast5/cast5.go
deleted file mode 100644
index ddcbeb6..0000000
--- a/vendor/golang.org/x/crypto/cast5/cast5.go
+++ /dev/null
@@ -1,533 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package cast5 implements CAST5, as defined in RFC 2144.
-//
-// CAST5 is a legacy cipher and its short block size makes it vulnerable to
-// birthday bound attacks (see https://sweet32.info). It should only be used
-// where compatibility with legacy systems, not security, is the goal.
-//
-// Deprecated: any new system should use AES (from crypto/aes, if necessary in
-// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
-// golang.org/x/crypto/chacha20poly1305).
-package cast5 // import "golang.org/x/crypto/cast5"
-
-import "errors"
-
-const BlockSize = 8
-const KeySize = 16
-
-type Cipher struct {
- masking [16]uint32
- rotate [16]uint8
-}
-
-func NewCipher(key []byte) (c *Cipher, err error) {
- if len(key) != KeySize {
- return nil, errors.New("CAST5: keys must be 16 bytes")
- }
-
- c = new(Cipher)
- c.keySchedule(key)
- return
-}
-
-func (c *Cipher) BlockSize() int {
- return BlockSize
-}
-
-func (c *Cipher) Encrypt(dst, src []byte) {
- l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
- r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
-
- l, r = r, l^f1(r, c.masking[0], c.rotate[0])
- l, r = r, l^f2(r, c.masking[1], c.rotate[1])
- l, r = r, l^f3(r, c.masking[2], c.rotate[2])
- l, r = r, l^f1(r, c.masking[3], c.rotate[3])
-
- l, r = r, l^f2(r, c.masking[4], c.rotate[4])
- l, r = r, l^f3(r, c.masking[5], c.rotate[5])
- l, r = r, l^f1(r, c.masking[6], c.rotate[6])
- l, r = r, l^f2(r, c.masking[7], c.rotate[7])
-
- l, r = r, l^f3(r, c.masking[8], c.rotate[8])
- l, r = r, l^f1(r, c.masking[9], c.rotate[9])
- l, r = r, l^f2(r, c.masking[10], c.rotate[10])
- l, r = r, l^f3(r, c.masking[11], c.rotate[11])
-
- l, r = r, l^f1(r, c.masking[12], c.rotate[12])
- l, r = r, l^f2(r, c.masking[13], c.rotate[13])
- l, r = r, l^f3(r, c.masking[14], c.rotate[14])
- l, r = r, l^f1(r, c.masking[15], c.rotate[15])
-
- dst[0] = uint8(r >> 24)
- dst[1] = uint8(r >> 16)
- dst[2] = uint8(r >> 8)
- dst[3] = uint8(r)
- dst[4] = uint8(l >> 24)
- dst[5] = uint8(l >> 16)
- dst[6] = uint8(l >> 8)
- dst[7] = uint8(l)
-}
-
-func (c *Cipher) Decrypt(dst, src []byte) {
- l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
- r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
-
- l, r = r, l^f1(r, c.masking[15], c.rotate[15])
- l, r = r, l^f3(r, c.masking[14], c.rotate[14])
- l, r = r, l^f2(r, c.masking[13], c.rotate[13])
- l, r = r, l^f1(r, c.masking[12], c.rotate[12])
-
- l, r = r, l^f3(r, c.masking[11], c.rotate[11])
- l, r = r, l^f2(r, c.masking[10], c.rotate[10])
- l, r = r, l^f1(r, c.masking[9], c.rotate[9])
- l, r = r, l^f3(r, c.masking[8], c.rotate[8])
-
- l, r = r, l^f2(r, c.masking[7], c.rotate[7])
- l, r = r, l^f1(r, c.masking[6], c.rotate[6])
- l, r = r, l^f3(r, c.masking[5], c.rotate[5])
- l, r = r, l^f2(r, c.masking[4], c.rotate[4])
-
- l, r = r, l^f1(r, c.masking[3], c.rotate[3])
- l, r = r, l^f3(r, c.masking[2], c.rotate[2])
- l, r = r, l^f2(r, c.masking[1], c.rotate[1])
- l, r = r, l^f1(r, c.masking[0], c.rotate[0])
-
- dst[0] = uint8(r >> 24)
- dst[1] = uint8(r >> 16)
- dst[2] = uint8(r >> 8)
- dst[3] = uint8(r)
- dst[4] = uint8(l >> 24)
- dst[5] = uint8(l >> 16)
- dst[6] = uint8(l >> 8)
- dst[7] = uint8(l)
-}
-
-type keyScheduleA [4][7]uint8
-type keyScheduleB [4][5]uint8
-
-// keyScheduleRound contains the magic values for a round of the key schedule.
-// The keyScheduleA deals with the lines like:
-// z0z1z2z3 = x0x1x2x3 ^ S5[xD] ^ S6[xF] ^ S7[xC] ^ S8[xE] ^ S7[x8]
-// Conceptually, both x and z are in the same array, x first. The first
-// element describes which word of this array gets written to and the
-// second, which word gets read. So, for the line above, it's "4, 0", because
-// it's writing to the first word of z, which, being after x, is word 4, and
-// reading from the first word of x: word 0.
-//
-// Next are the indexes into the S-boxes. Now the array is treated as bytes. So
-// "xD" is 0xd. The first byte of z is written as "16 + 0", just to be clear
-// that it's z that we're indexing.
-//
-// keyScheduleB deals with lines like:
-// K1 = S5[z8] ^ S6[z9] ^ S7[z7] ^ S8[z6] ^ S5[z2]
-// "K1" is ignored because key words are always written in order. So the five
-// elements are the S-box indexes. They use the same form as in keyScheduleA,
-// above.
-
-type keyScheduleRound struct{}
-type keySchedule []keyScheduleRound
-
-var schedule = []struct {
- a keyScheduleA
- b keyScheduleB
-}{
- {
- keyScheduleA{
- {4, 0, 0xd, 0xf, 0xc, 0xe, 0x8},
- {5, 2, 16 + 0, 16 + 2, 16 + 1, 16 + 3, 0xa},
- {6, 3, 16 + 7, 16 + 6, 16 + 5, 16 + 4, 9},
- {7, 1, 16 + 0xa, 16 + 9, 16 + 0xb, 16 + 8, 0xb},
- },
- keyScheduleB{
- {16 + 8, 16 + 9, 16 + 7, 16 + 6, 16 + 2},
- {16 + 0xa, 16 + 0xb, 16 + 5, 16 + 4, 16 + 6},
- {16 + 0xc, 16 + 0xd, 16 + 3, 16 + 2, 16 + 9},
- {16 + 0xe, 16 + 0xf, 16 + 1, 16 + 0, 16 + 0xc},
- },
- },
- {
- keyScheduleA{
- {0, 6, 16 + 5, 16 + 7, 16 + 4, 16 + 6, 16 + 0},
- {1, 4, 0, 2, 1, 3, 16 + 2},
- {2, 5, 7, 6, 5, 4, 16 + 1},
- {3, 7, 0xa, 9, 0xb, 8, 16 + 3},
- },
- keyScheduleB{
- {3, 2, 0xc, 0xd, 8},
- {1, 0, 0xe, 0xf, 0xd},
- {7, 6, 8, 9, 3},
- {5, 4, 0xa, 0xb, 7},
- },
- },
- {
- keyScheduleA{
- {4, 0, 0xd, 0xf, 0xc, 0xe, 8},
- {5, 2, 16 + 0, 16 + 2, 16 + 1, 16 + 3, 0xa},
- {6, 3, 16 + 7, 16 + 6, 16 + 5, 16 + 4, 9},
- {7, 1, 16 + 0xa, 16 + 9, 16 + 0xb, 16 + 8, 0xb},
- },
- keyScheduleB{
- {16 + 3, 16 + 2, 16 + 0xc, 16 + 0xd, 16 + 9},
- {16 + 1, 16 + 0, 16 + 0xe, 16 + 0xf, 16 + 0xc},
- {16 + 7, 16 + 6, 16 + 8, 16 + 9, 16 + 2},
- {16 + 5, 16 + 4, 16 + 0xa, 16 + 0xb, 16 + 6},
- },
- },
- {
- keyScheduleA{
- {0, 6, 16 + 5, 16 + 7, 16 + 4, 16 + 6, 16 + 0},
- {1, 4, 0, 2, 1, 3, 16 + 2},
- {2, 5, 7, 6, 5, 4, 16 + 1},
- {3, 7, 0xa, 9, 0xb, 8, 16 + 3},
- },
- keyScheduleB{
- {8, 9, 7, 6, 3},
- {0xa, 0xb, 5, 4, 7},
- {0xc, 0xd, 3, 2, 8},
- {0xe, 0xf, 1, 0, 0xd},
- },
- },
-}
-
-func (c *Cipher) keySchedule(in []byte) {
- var t [8]uint32
- var k [32]uint32
-
- for i := 0; i < 4; i++ {
- j := i * 4
- t[i] = uint32(in[j])<<24 | uint32(in[j+1])<<16 | uint32(in[j+2])<<8 | uint32(in[j+3])
- }
-
- x := []byte{6, 7, 4, 5}
- ki := 0
-
- for half := 0; half < 2; half++ {
- for _, round := range schedule {
- for j := 0; j < 4; j++ {
- var a [7]uint8
- copy(a[:], round.a[j][:])
- w := t[a[1]]
- w ^= sBox[4][(t[a[2]>>2]>>(24-8*(a[2]&3)))&0xff]
- w ^= sBox[5][(t[a[3]>>2]>>(24-8*(a[3]&3)))&0xff]
- w ^= sBox[6][(t[a[4]>>2]>>(24-8*(a[4]&3)))&0xff]
- w ^= sBox[7][(t[a[5]>>2]>>(24-8*(a[5]&3)))&0xff]
- w ^= sBox[x[j]][(t[a[6]>>2]>>(24-8*(a[6]&3)))&0xff]
- t[a[0]] = w
- }
-
- for j := 0; j < 4; j++ {
- var b [5]uint8
- copy(b[:], round.b[j][:])
- w := sBox[4][(t[b[0]>>2]>>(24-8*(b[0]&3)))&0xff]
- w ^= sBox[5][(t[b[1]>>2]>>(24-8*(b[1]&3)))&0xff]
- w ^= sBox[6][(t[b[2]>>2]>>(24-8*(b[2]&3)))&0xff]
- w ^= sBox[7][(t[b[3]>>2]>>(24-8*(b[3]&3)))&0xff]
- w ^= sBox[4+j][(t[b[4]>>2]>>(24-8*(b[4]&3)))&0xff]
- k[ki] = w
- ki++
- }
- }
- }
-
- for i := 0; i < 16; i++ {
- c.masking[i] = k[i]
- c.rotate[i] = uint8(k[16+i] & 0x1f)
- }
-}
-
-// These are the three 'f' functions. See RFC 2144, section 2.2.
-func f1(d, m uint32, r uint8) uint32 {
- t := m + d
- I := (t << r) | (t >> (32 - r))
- return ((sBox[0][I>>24] ^ sBox[1][(I>>16)&0xff]) - sBox[2][(I>>8)&0xff]) + sBox[3][I&0xff]
-}
-
-func f2(d, m uint32, r uint8) uint32 {
- t := m ^ d
- I := (t << r) | (t >> (32 - r))
- return ((sBox[0][I>>24] - sBox[1][(I>>16)&0xff]) + sBox[2][(I>>8)&0xff]) ^ sBox[3][I&0xff]
-}
-
-func f3(d, m uint32, r uint8) uint32 {
- t := m - d
- I := (t << r) | (t >> (32 - r))
- return ((sBox[0][I>>24] + sBox[1][(I>>16)&0xff]) ^ sBox[2][(I>>8)&0xff]) - sBox[3][I&0xff]
-}
-
-var sBox = [8][256]uint32{
- {
- 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949,
- 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
- 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
- 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0,
- 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7,
- 0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935,
- 0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d,
- 0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
- 0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe,
- 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3,
- 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167,
- 0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291,
- 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779,
- 0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2,
- 0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511,
- 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d,
- 0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5,
- 0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324,
- 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
- 0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc,
- 0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d,
- 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96,
- 0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a,
- 0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
- 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
- 0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6,
- 0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
- 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872,
- 0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c,
- 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e,
- 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9,
- 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf,
- },
- {
- 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651,
- 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
- 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb,
- 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
- 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b,
- 0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359,
- 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b,
- 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c,
- 0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34,
- 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb,
- 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd,
- 0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860,
- 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b,
- 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
- 0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b,
- 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
- 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c,
- 0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13,
- 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f,
- 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
- 0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6,
- 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58,
- 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
- 0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
- 0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6,
- 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
- 0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6,
- 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f,
- 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
- 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
- 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9,
- 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1,
- },
- {
- 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90,
- 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5,
- 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e,
- 0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240,
- 0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
- 0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
- 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71,
- 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
- 0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
- 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15,
- 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2,
- 0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176,
- 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148,
- 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc,
- 0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
- 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e,
- 0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
- 0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f,
- 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a,
- 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b,
- 0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
- 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
- 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
- 0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536,
- 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
- 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
- 0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69,
- 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
- 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49,
- 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d,
- 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a,
- 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783,
- },
- {
- 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1,
- 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf,
- 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15,
- 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121,
- 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25,
- 0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
- 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb,
- 0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5,
- 0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d,
- 0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6,
- 0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23,
- 0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003,
- 0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
- 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119,
- 0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
- 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a,
- 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79,
- 0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df,
- 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
- 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab,
- 0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7,
- 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
- 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2,
- 0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2,
- 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
- 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919,
- 0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
- 0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
- 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab,
- 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04,
- 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282,
- 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2,
- },
- {
- 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f,
- 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a,
- 0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff,
- 0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02,
- 0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a,
- 0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7,
- 0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9,
- 0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981,
- 0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774,
- 0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655,
- 0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2,
- 0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910,
- 0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1,
- 0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da,
- 0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049,
- 0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f,
- 0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba,
- 0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be,
- 0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3,
- 0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840,
- 0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4,
- 0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2,
- 0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7,
- 0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5,
- 0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e,
- 0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e,
- 0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801,
- 0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad,
- 0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0,
- 0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20,
- 0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8,
- 0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4,
- },
- {
- 0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac,
- 0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138,
- 0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367,
- 0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98,
- 0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072,
- 0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3,
- 0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd,
- 0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8,
- 0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9,
- 0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54,
- 0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387,
- 0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc,
- 0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf,
- 0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf,
- 0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f,
- 0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289,
- 0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950,
- 0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f,
- 0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b,
- 0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be,
- 0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13,
- 0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976,
- 0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0,
- 0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891,
- 0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da,
- 0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc,
- 0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084,
- 0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25,
- 0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121,
- 0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5,
- 0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd,
- 0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f,
- },
- {
- 0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f,
- 0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de,
- 0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43,
- 0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19,
- 0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2,
- 0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516,
- 0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88,
- 0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816,
- 0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756,
- 0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a,
- 0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264,
- 0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688,
- 0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28,
- 0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3,
- 0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7,
- 0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06,
- 0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033,
- 0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a,
- 0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566,
- 0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509,
- 0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962,
- 0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e,
- 0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c,
- 0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c,
- 0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285,
- 0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301,
- 0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be,
- 0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767,
- 0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647,
- 0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914,
- 0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c,
- 0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3,
- },
- {
- 0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5,
- 0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc,
- 0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd,
- 0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d,
- 0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2,
- 0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862,
- 0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc,
- 0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c,
- 0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e,
- 0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039,
- 0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8,
- 0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42,
- 0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5,
- 0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472,
- 0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225,
- 0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c,
- 0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb,
- 0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054,
- 0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70,
- 0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc,
- 0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c,
- 0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3,
- 0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4,
- 0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101,
- 0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f,
- 0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e,
- 0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a,
- 0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c,
- 0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384,
- 0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c,
- 0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82,
- 0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e,
- },
-}
diff --git a/vendor/golang.org/x/crypto/internal/alias/alias.go b/vendor/golang.org/x/crypto/internal/alias/alias.go
deleted file mode 100644
index 69c17f8..0000000
--- a/vendor/golang.org/x/crypto/internal/alias/alias.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !purego
-// +build !purego
-
-// Package alias implements memory aliasing tests.
-package alias
-
-import "unsafe"
-
-// AnyOverlap reports whether x and y share memory at any (not necessarily
-// corresponding) index. The memory beyond the slice length is ignored.
-func AnyOverlap(x, y []byte) bool {
- return len(x) > 0 && len(y) > 0 &&
- uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) &&
- uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1]))
-}
-
-// InexactOverlap reports whether x and y share memory at any non-corresponding
-// index. The memory beyond the slice length is ignored. Note that x and y can
-// have different lengths and still not have any inexact overlap.
-//
-// InexactOverlap can be used to implement the requirements of the crypto/cipher
-// AEAD, Block, BlockMode and Stream interfaces.
-func InexactOverlap(x, y []byte) bool {
- if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
- return false
- }
- return AnyOverlap(x, y)
-}
diff --git a/vendor/golang.org/x/crypto/internal/alias/alias_purego.go b/vendor/golang.org/x/crypto/internal/alias/alias_purego.go
deleted file mode 100644
index 4775b0a..0000000
--- a/vendor/golang.org/x/crypto/internal/alias/alias_purego.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build purego
-// +build purego
-
-// Package alias implements memory aliasing tests.
-package alias
-
-// This is the Google App Engine standard variant based on reflect
-// because the unsafe package and cgo are disallowed.
-
-import "reflect"
-
-// AnyOverlap reports whether x and y share memory at any (not necessarily
-// corresponding) index. The memory beyond the slice length is ignored.
-func AnyOverlap(x, y []byte) bool {
- return len(x) > 0 && len(y) > 0 &&
- reflect.ValueOf(&x[0]).Pointer() <= reflect.ValueOf(&y[len(y)-1]).Pointer() &&
- reflect.ValueOf(&y[0]).Pointer() <= reflect.ValueOf(&x[len(x)-1]).Pointer()
-}
-
-// InexactOverlap reports whether x and y share memory at any non-corresponding
-// index. The memory beyond the slice length is ignored. Note that x and y can
-// have different lengths and still not have any inexact overlap.
-//
-// InexactOverlap can be used to implement the requirements of the crypto/cipher
-// AEAD, Block, BlockMode and Stream interfaces.
-func InexactOverlap(x, y []byte) bool {
- if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
- return false
- }
- return AnyOverlap(x, y)
-}
diff --git a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
deleted file mode 100644
index 904b57e..0000000
--- a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package pbkdf2 implements the key derivation function PBKDF2 as defined in RFC
-2898 / PKCS #5 v2.0.
-
-A key derivation function is useful when encrypting data based on a password
-or any other not-fully-random data. It uses a pseudorandom function to derive
-a secure encryption key based on the password.
-
-While v2.0 of the standard defines only one pseudorandom function to use,
-HMAC-SHA1, the drafted v2.1 specification allows use of all five FIPS Approved
-Hash Functions SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512 for HMAC. To
-choose, you can pass the `New` functions from the different SHA packages to
-pbkdf2.Key.
-*/
-package pbkdf2 // import "golang.org/x/crypto/pbkdf2"
-
-import (
- "crypto/hmac"
- "hash"
-)
-
-// Key derives a key from the password, salt and iteration count, returning a
-// []byte of length keylen that can be used as cryptographic key. The key is
-// derived based on the method described as PBKDF2 with the HMAC variant using
-// the supplied hash function.
-//
-// For example, to use a HMAC-SHA-1 based PBKDF2 key derivation function, you
-// can get a derived key for e.g. AES-256 (which needs a 32-byte key) by
-// doing:
-//
-// dk := pbkdf2.Key([]byte("some password"), salt, 4096, 32, sha1.New)
-//
-// Remember to get a good random salt. At least 8 bytes is recommended by the
-// RFC.
-//
-// Using a higher iteration count will increase the cost of an exhaustive
-// search but will also make derivation proportionally slower.
-func Key(password, salt []byte, iter, keyLen int, h func() hash.Hash) []byte {
- prf := hmac.New(h, password)
- hashLen := prf.Size()
- numBlocks := (keyLen + hashLen - 1) / hashLen
-
- var buf [4]byte
- dk := make([]byte, 0, numBlocks*hashLen)
- U := make([]byte, hashLen)
- for block := 1; block <= numBlocks; block++ {
- // N.B.: || means concatenation, ^ means XOR
- // for each block T_i = U_1 ^ U_2 ^ ... ^ U_iter
- // U_1 = PRF(password, salt || uint(i))
- prf.Reset()
- prf.Write(salt)
- buf[0] = byte(block >> 24)
- buf[1] = byte(block >> 16)
- buf[2] = byte(block >> 8)
- buf[3] = byte(block)
- prf.Write(buf[:4])
- dk = prf.Sum(dk)
- T := dk[len(dk)-hashLen:]
- copy(U, T)
-
- // U_n = PRF(password, U_(n-1))
- for n := 2; n <= iter; n++ {
- prf.Reset()
- prf.Write(U)
- U = U[:0]
- U = prf.Sum(U)
- for x := range U {
- T[x] ^= U[x]
- }
- }
- }
- return dk[:keyLen]
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
deleted file mode 100644
index 4c96147..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package salsa provides low-level access to functions in the Salsa family.
-package salsa // import "golang.org/x/crypto/salsa20/salsa"
-
-// Sigma is the Salsa20 constant for 256-bit keys.
-var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'}
-
-// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte
-// key k, and 16-byte constant c, and puts the result into the 32-byte array
-// out.
-func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
- x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
- x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
- x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
- x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
- x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
- x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
- x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
- x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
- x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
- x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
- x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
- x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
- x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
- x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
- x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
- x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
-
- for i := 0; i < 20; i += 2 {
- u := x0 + x12
- x4 ^= u<<7 | u>>(32-7)
- u = x4 + x0
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x4
- x12 ^= u<<13 | u>>(32-13)
- u = x12 + x8
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x1
- x9 ^= u<<7 | u>>(32-7)
- u = x9 + x5
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x9
- x1 ^= u<<13 | u>>(32-13)
- u = x1 + x13
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x6
- x14 ^= u<<7 | u>>(32-7)
- u = x14 + x10
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x14
- x6 ^= u<<13 | u>>(32-13)
- u = x6 + x2
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x11
- x3 ^= u<<7 | u>>(32-7)
- u = x3 + x15
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x3
- x11 ^= u<<13 | u>>(32-13)
- u = x11 + x7
- x15 ^= u<<18 | u>>(32-18)
-
- u = x0 + x3
- x1 ^= u<<7 | u>>(32-7)
- u = x1 + x0
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x1
- x3 ^= u<<13 | u>>(32-13)
- u = x3 + x2
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x4
- x6 ^= u<<7 | u>>(32-7)
- u = x6 + x5
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x6
- x4 ^= u<<13 | u>>(32-13)
- u = x4 + x7
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x9
- x11 ^= u<<7 | u>>(32-7)
- u = x11 + x10
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x11
- x9 ^= u<<13 | u>>(32-13)
- u = x9 + x8
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x14
- x12 ^= u<<7 | u>>(32-7)
- u = x12 + x15
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x12
- x14 ^= u<<13 | u>>(32-13)
- u = x14 + x13
- x15 ^= u<<18 | u>>(32-18)
- }
- out[0] = byte(x0)
- out[1] = byte(x0 >> 8)
- out[2] = byte(x0 >> 16)
- out[3] = byte(x0 >> 24)
-
- out[4] = byte(x5)
- out[5] = byte(x5 >> 8)
- out[6] = byte(x5 >> 16)
- out[7] = byte(x5 >> 24)
-
- out[8] = byte(x10)
- out[9] = byte(x10 >> 8)
- out[10] = byte(x10 >> 16)
- out[11] = byte(x10 >> 24)
-
- out[12] = byte(x15)
- out[13] = byte(x15 >> 8)
- out[14] = byte(x15 >> 16)
- out[15] = byte(x15 >> 24)
-
- out[16] = byte(x6)
- out[17] = byte(x6 >> 8)
- out[18] = byte(x6 >> 16)
- out[19] = byte(x6 >> 24)
-
- out[20] = byte(x7)
- out[21] = byte(x7 >> 8)
- out[22] = byte(x7 >> 16)
- out[23] = byte(x7 >> 24)
-
- out[24] = byte(x8)
- out[25] = byte(x8 >> 8)
- out[26] = byte(x8 >> 16)
- out[27] = byte(x8 >> 24)
-
- out[28] = byte(x9)
- out[29] = byte(x9 >> 8)
- out[30] = byte(x9 >> 16)
- out[31] = byte(x9 >> 24)
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go
deleted file mode 100644
index 9bfc092..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package salsa
-
-// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts
-// the result into the 64-byte array out. The input and output may be the same array.
-func Core208(out *[64]byte, in *[64]byte) {
- j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
- j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
- j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
- j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
- j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24
- j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24
- j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24
- j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24
- j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24
- j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24
- j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24
- j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24
- j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24
- j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24
- j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24
- j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24
-
- x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
- x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
-
- for i := 0; i < 8; i += 2 {
- u := x0 + x12
- x4 ^= u<<7 | u>>(32-7)
- u = x4 + x0
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x4
- x12 ^= u<<13 | u>>(32-13)
- u = x12 + x8
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x1
- x9 ^= u<<7 | u>>(32-7)
- u = x9 + x5
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x9
- x1 ^= u<<13 | u>>(32-13)
- u = x1 + x13
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x6
- x14 ^= u<<7 | u>>(32-7)
- u = x14 + x10
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x14
- x6 ^= u<<13 | u>>(32-13)
- u = x6 + x2
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x11
- x3 ^= u<<7 | u>>(32-7)
- u = x3 + x15
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x3
- x11 ^= u<<13 | u>>(32-13)
- u = x11 + x7
- x15 ^= u<<18 | u>>(32-18)
-
- u = x0 + x3
- x1 ^= u<<7 | u>>(32-7)
- u = x1 + x0
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x1
- x3 ^= u<<13 | u>>(32-13)
- u = x3 + x2
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x4
- x6 ^= u<<7 | u>>(32-7)
- u = x6 + x5
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x6
- x4 ^= u<<13 | u>>(32-13)
- u = x4 + x7
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x9
- x11 ^= u<<7 | u>>(32-7)
- u = x11 + x10
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x11
- x9 ^= u<<13 | u>>(32-13)
- u = x9 + x8
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x14
- x12 ^= u<<7 | u>>(32-7)
- u = x12 + x15
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x12
- x14 ^= u<<13 | u>>(32-13)
- u = x14 + x13
- x15 ^= u<<18 | u>>(32-18)
- }
- x0 += j0
- x1 += j1
- x2 += j2
- x3 += j3
- x4 += j4
- x5 += j5
- x6 += j6
- x7 += j7
- x8 += j8
- x9 += j9
- x10 += j10
- x11 += j11
- x12 += j12
- x13 += j13
- x14 += j14
- x15 += j15
-
- out[0] = byte(x0)
- out[1] = byte(x0 >> 8)
- out[2] = byte(x0 >> 16)
- out[3] = byte(x0 >> 24)
-
- out[4] = byte(x1)
- out[5] = byte(x1 >> 8)
- out[6] = byte(x1 >> 16)
- out[7] = byte(x1 >> 24)
-
- out[8] = byte(x2)
- out[9] = byte(x2 >> 8)
- out[10] = byte(x2 >> 16)
- out[11] = byte(x2 >> 24)
-
- out[12] = byte(x3)
- out[13] = byte(x3 >> 8)
- out[14] = byte(x3 >> 16)
- out[15] = byte(x3 >> 24)
-
- out[16] = byte(x4)
- out[17] = byte(x4 >> 8)
- out[18] = byte(x4 >> 16)
- out[19] = byte(x4 >> 24)
-
- out[20] = byte(x5)
- out[21] = byte(x5 >> 8)
- out[22] = byte(x5 >> 16)
- out[23] = byte(x5 >> 24)
-
- out[24] = byte(x6)
- out[25] = byte(x6 >> 8)
- out[26] = byte(x6 >> 16)
- out[27] = byte(x6 >> 24)
-
- out[28] = byte(x7)
- out[29] = byte(x7 >> 8)
- out[30] = byte(x7 >> 16)
- out[31] = byte(x7 >> 24)
-
- out[32] = byte(x8)
- out[33] = byte(x8 >> 8)
- out[34] = byte(x8 >> 16)
- out[35] = byte(x8 >> 24)
-
- out[36] = byte(x9)
- out[37] = byte(x9 >> 8)
- out[38] = byte(x9 >> 16)
- out[39] = byte(x9 >> 24)
-
- out[40] = byte(x10)
- out[41] = byte(x10 >> 8)
- out[42] = byte(x10 >> 16)
- out[43] = byte(x10 >> 24)
-
- out[44] = byte(x11)
- out[45] = byte(x11 >> 8)
- out[46] = byte(x11 >> 16)
- out[47] = byte(x11 >> 24)
-
- out[48] = byte(x12)
- out[49] = byte(x12 >> 8)
- out[50] = byte(x12 >> 16)
- out[51] = byte(x12 >> 24)
-
- out[52] = byte(x13)
- out[53] = byte(x13 >> 8)
- out[54] = byte(x13 >> 16)
- out[55] = byte(x13 >> 24)
-
- out[56] = byte(x14)
- out[57] = byte(x14 >> 8)
- out[58] = byte(x14 >> 16)
- out[59] = byte(x14 >> 24)
-
- out[60] = byte(x15)
- out[61] = byte(x15 >> 8)
- out[62] = byte(x15 >> 16)
- out[63] = byte(x15 >> 24)
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
deleted file mode 100644
index c400dfc..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && !purego && gc
-// +build amd64,!purego,gc
-
-package salsa
-
-//go:noescape
-
-// salsa2020XORKeyStream is implemented in salsa20_amd64.s.
-func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
-
-// XORKeyStream crypts bytes from in to out using the given key and counters.
-// In and out must overlap entirely or not at all. Counter
-// contains the raw salsa20 counter bytes (both nonce and block counter).
-func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
- if len(in) == 0 {
- return
- }
- _ = out[len(in)-1]
- salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0])
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
deleted file mode 100644
index c089277..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
+++ /dev/null
@@ -1,881 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && !purego && gc
-// +build amd64,!purego,gc
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
-// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
-TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
- MOVQ out+0(FP),DI
- MOVQ in+8(FP),SI
- MOVQ n+16(FP),DX
- MOVQ nonce+24(FP),CX
- MOVQ key+32(FP),R8
-
- MOVQ SP,R12
- ADDQ $31, R12
- ANDQ $~31, R12
-
- MOVQ DX,R9
- MOVQ CX,DX
- MOVQ R8,R10
- CMPQ R9,$0
- JBE DONE
- START:
- MOVL 20(R10),CX
- MOVL 0(R10),R8
- MOVL 0(DX),AX
- MOVL 16(R10),R11
- MOVL CX,0(R12)
- MOVL R8, 4 (R12)
- MOVL AX, 8 (R12)
- MOVL R11, 12 (R12)
- MOVL 8(DX),CX
- MOVL 24(R10),R8
- MOVL 4(R10),AX
- MOVL 4(DX),R11
- MOVL CX,16(R12)
- MOVL R8, 20 (R12)
- MOVL AX, 24 (R12)
- MOVL R11, 28 (R12)
- MOVL 12(DX),CX
- MOVL 12(R10),DX
- MOVL 28(R10),R8
- MOVL 8(R10),AX
- MOVL DX,32(R12)
- MOVL CX, 36 (R12)
- MOVL R8, 40 (R12)
- MOVL AX, 44 (R12)
- MOVQ $1634760805,DX
- MOVQ $857760878,CX
- MOVQ $2036477234,R8
- MOVQ $1797285236,AX
- MOVL DX,48(R12)
- MOVL CX, 52 (R12)
- MOVL R8, 56 (R12)
- MOVL AX, 60 (R12)
- CMPQ R9,$256
- JB BYTESBETWEEN1AND255
- MOVOA 48(R12),X0
- PSHUFL $0X55,X0,X1
- PSHUFL $0XAA,X0,X2
- PSHUFL $0XFF,X0,X3
- PSHUFL $0X00,X0,X0
- MOVOA X1,64(R12)
- MOVOA X2,80(R12)
- MOVOA X3,96(R12)
- MOVOA X0,112(R12)
- MOVOA 0(R12),X0
- PSHUFL $0XAA,X0,X1
- PSHUFL $0XFF,X0,X2
- PSHUFL $0X00,X0,X3
- PSHUFL $0X55,X0,X0
- MOVOA X1,128(R12)
- MOVOA X2,144(R12)
- MOVOA X3,160(R12)
- MOVOA X0,176(R12)
- MOVOA 16(R12),X0
- PSHUFL $0XFF,X0,X1
- PSHUFL $0X55,X0,X2
- PSHUFL $0XAA,X0,X0
- MOVOA X1,192(R12)
- MOVOA X2,208(R12)
- MOVOA X0,224(R12)
- MOVOA 32(R12),X0
- PSHUFL $0X00,X0,X1
- PSHUFL $0XAA,X0,X2
- PSHUFL $0XFF,X0,X0
- MOVOA X1,240(R12)
- MOVOA X2,256(R12)
- MOVOA X0,272(R12)
- BYTESATLEAST256:
- MOVL 16(R12),DX
- MOVL 36 (R12),CX
- MOVL DX,288(R12)
- MOVL CX,304(R12)
- SHLQ $32,CX
- ADDQ CX,DX
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 292 (R12)
- MOVL CX, 308 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 296 (R12)
- MOVL CX, 312 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 300 (R12)
- MOVL CX, 316 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX,16(R12)
- MOVL CX, 36 (R12)
- MOVQ R9,352(R12)
- MOVQ $20,DX
- MOVOA 64(R12),X0
- MOVOA 80(R12),X1
- MOVOA 96(R12),X2
- MOVOA 256(R12),X3
- MOVOA 272(R12),X4
- MOVOA 128(R12),X5
- MOVOA 144(R12),X6
- MOVOA 176(R12),X7
- MOVOA 192(R12),X8
- MOVOA 208(R12),X9
- MOVOA 224(R12),X10
- MOVOA 304(R12),X11
- MOVOA 112(R12),X12
- MOVOA 160(R12),X13
- MOVOA 240(R12),X14
- MOVOA 288(R12),X15
- MAINLOOP1:
- MOVOA X1,320(R12)
- MOVOA X2,336(R12)
- MOVOA X13,X1
- PADDL X12,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X14
- PSRLL $25,X2
- PXOR X2,X14
- MOVOA X7,X1
- PADDL X0,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X11
- PSRLL $25,X2
- PXOR X2,X11
- MOVOA X12,X1
- PADDL X14,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X15
- PSRLL $23,X2
- PXOR X2,X15
- MOVOA X0,X1
- PADDL X11,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X9
- PSRLL $23,X2
- PXOR X2,X9
- MOVOA X14,X1
- PADDL X15,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X13
- PSRLL $19,X2
- PXOR X2,X13
- MOVOA X11,X1
- PADDL X9,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X7
- PSRLL $19,X2
- PXOR X2,X7
- MOVOA X15,X1
- PADDL X13,X1
- MOVOA X1,X2
- PSLLL $18,X1
- PXOR X1,X12
- PSRLL $14,X2
- PXOR X2,X12
- MOVOA 320(R12),X1
- MOVOA X12,320(R12)
- MOVOA X9,X2
- PADDL X7,X2
- MOVOA X2,X12
- PSLLL $18,X2
- PXOR X2,X0
- PSRLL $14,X12
- PXOR X12,X0
- MOVOA X5,X2
- PADDL X1,X2
- MOVOA X2,X12
- PSLLL $7,X2
- PXOR X2,X3
- PSRLL $25,X12
- PXOR X12,X3
- MOVOA 336(R12),X2
- MOVOA X0,336(R12)
- MOVOA X6,X0
- PADDL X2,X0
- MOVOA X0,X12
- PSLLL $7,X0
- PXOR X0,X4
- PSRLL $25,X12
- PXOR X12,X4
- MOVOA X1,X0
- PADDL X3,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X10
- PSRLL $23,X12
- PXOR X12,X10
- MOVOA X2,X0
- PADDL X4,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X8
- PSRLL $23,X12
- PXOR X12,X8
- MOVOA X3,X0
- PADDL X10,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X5
- PSRLL $19,X12
- PXOR X12,X5
- MOVOA X4,X0
- PADDL X8,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X6
- PSRLL $19,X12
- PXOR X12,X6
- MOVOA X10,X0
- PADDL X5,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X1
- PSRLL $14,X12
- PXOR X12,X1
- MOVOA 320(R12),X0
- MOVOA X1,320(R12)
- MOVOA X4,X1
- PADDL X0,X1
- MOVOA X1,X12
- PSLLL $7,X1
- PXOR X1,X7
- PSRLL $25,X12
- PXOR X12,X7
- MOVOA X8,X1
- PADDL X6,X1
- MOVOA X1,X12
- PSLLL $18,X1
- PXOR X1,X2
- PSRLL $14,X12
- PXOR X12,X2
- MOVOA 336(R12),X12
- MOVOA X2,336(R12)
- MOVOA X14,X1
- PADDL X12,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X5
- PSRLL $25,X2
- PXOR X2,X5
- MOVOA X0,X1
- PADDL X7,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X10
- PSRLL $23,X2
- PXOR X2,X10
- MOVOA X12,X1
- PADDL X5,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X8
- PSRLL $23,X2
- PXOR X2,X8
- MOVOA X7,X1
- PADDL X10,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X4
- PSRLL $19,X2
- PXOR X2,X4
- MOVOA X5,X1
- PADDL X8,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X14
- PSRLL $19,X2
- PXOR X2,X14
- MOVOA X10,X1
- PADDL X4,X1
- MOVOA X1,X2
- PSLLL $18,X1
- PXOR X1,X0
- PSRLL $14,X2
- PXOR X2,X0
- MOVOA 320(R12),X1
- MOVOA X0,320(R12)
- MOVOA X8,X0
- PADDL X14,X0
- MOVOA X0,X2
- PSLLL $18,X0
- PXOR X0,X12
- PSRLL $14,X2
- PXOR X2,X12
- MOVOA X11,X0
- PADDL X1,X0
- MOVOA X0,X2
- PSLLL $7,X0
- PXOR X0,X6
- PSRLL $25,X2
- PXOR X2,X6
- MOVOA 336(R12),X2
- MOVOA X12,336(R12)
- MOVOA X3,X0
- PADDL X2,X0
- MOVOA X0,X12
- PSLLL $7,X0
- PXOR X0,X13
- PSRLL $25,X12
- PXOR X12,X13
- MOVOA X1,X0
- PADDL X6,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X15
- PSRLL $23,X12
- PXOR X12,X15
- MOVOA X2,X0
- PADDL X13,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X9
- PSRLL $23,X12
- PXOR X12,X9
- MOVOA X6,X0
- PADDL X15,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X11
- PSRLL $19,X12
- PXOR X12,X11
- MOVOA X13,X0
- PADDL X9,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X3
- PSRLL $19,X12
- PXOR X12,X3
- MOVOA X15,X0
- PADDL X11,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X1
- PSRLL $14,X12
- PXOR X12,X1
- MOVOA X9,X0
- PADDL X3,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X2
- PSRLL $14,X12
- PXOR X12,X2
- MOVOA 320(R12),X12
- MOVOA 336(R12),X0
- SUBQ $2,DX
- JA MAINLOOP1
- PADDL 112(R12),X12
- PADDL 176(R12),X7
- PADDL 224(R12),X10
- PADDL 272(R12),X4
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 0(SI),DX
- XORL 4(SI),CX
- XORL 8(SI),R8
- XORL 12(SI),R9
- MOVL DX,0(DI)
- MOVL CX,4(DI)
- MOVL R8,8(DI)
- MOVL R9,12(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 64(SI),DX
- XORL 68(SI),CX
- XORL 72(SI),R8
- XORL 76(SI),R9
- MOVL DX,64(DI)
- MOVL CX,68(DI)
- MOVL R8,72(DI)
- MOVL R9,76(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 128(SI),DX
- XORL 132(SI),CX
- XORL 136(SI),R8
- XORL 140(SI),R9
- MOVL DX,128(DI)
- MOVL CX,132(DI)
- MOVL R8,136(DI)
- MOVL R9,140(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- XORL 192(SI),DX
- XORL 196(SI),CX
- XORL 200(SI),R8
- XORL 204(SI),R9
- MOVL DX,192(DI)
- MOVL CX,196(DI)
- MOVL R8,200(DI)
- MOVL R9,204(DI)
- PADDL 240(R12),X14
- PADDL 64(R12),X0
- PADDL 128(R12),X5
- PADDL 192(R12),X8
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 16(SI),DX
- XORL 20(SI),CX
- XORL 24(SI),R8
- XORL 28(SI),R9
- MOVL DX,16(DI)
- MOVL CX,20(DI)
- MOVL R8,24(DI)
- MOVL R9,28(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 80(SI),DX
- XORL 84(SI),CX
- XORL 88(SI),R8
- XORL 92(SI),R9
- MOVL DX,80(DI)
- MOVL CX,84(DI)
- MOVL R8,88(DI)
- MOVL R9,92(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 144(SI),DX
- XORL 148(SI),CX
- XORL 152(SI),R8
- XORL 156(SI),R9
- MOVL DX,144(DI)
- MOVL CX,148(DI)
- MOVL R8,152(DI)
- MOVL R9,156(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- XORL 208(SI),DX
- XORL 212(SI),CX
- XORL 216(SI),R8
- XORL 220(SI),R9
- MOVL DX,208(DI)
- MOVL CX,212(DI)
- MOVL R8,216(DI)
- MOVL R9,220(DI)
- PADDL 288(R12),X15
- PADDL 304(R12),X11
- PADDL 80(R12),X1
- PADDL 144(R12),X6
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 32(SI),DX
- XORL 36(SI),CX
- XORL 40(SI),R8
- XORL 44(SI),R9
- MOVL DX,32(DI)
- MOVL CX,36(DI)
- MOVL R8,40(DI)
- MOVL R9,44(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 96(SI),DX
- XORL 100(SI),CX
- XORL 104(SI),R8
- XORL 108(SI),R9
- MOVL DX,96(DI)
- MOVL CX,100(DI)
- MOVL R8,104(DI)
- MOVL R9,108(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 160(SI),DX
- XORL 164(SI),CX
- XORL 168(SI),R8
- XORL 172(SI),R9
- MOVL DX,160(DI)
- MOVL CX,164(DI)
- MOVL R8,168(DI)
- MOVL R9,172(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- XORL 224(SI),DX
- XORL 228(SI),CX
- XORL 232(SI),R8
- XORL 236(SI),R9
- MOVL DX,224(DI)
- MOVL CX,228(DI)
- MOVL R8,232(DI)
- MOVL R9,236(DI)
- PADDL 160(R12),X13
- PADDL 208(R12),X9
- PADDL 256(R12),X3
- PADDL 96(R12),X2
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 48(SI),DX
- XORL 52(SI),CX
- XORL 56(SI),R8
- XORL 60(SI),R9
- MOVL DX,48(DI)
- MOVL CX,52(DI)
- MOVL R8,56(DI)
- MOVL R9,60(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 112(SI),DX
- XORL 116(SI),CX
- XORL 120(SI),R8
- XORL 124(SI),R9
- MOVL DX,112(DI)
- MOVL CX,116(DI)
- MOVL R8,120(DI)
- MOVL R9,124(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 176(SI),DX
- XORL 180(SI),CX
- XORL 184(SI),R8
- XORL 188(SI),R9
- MOVL DX,176(DI)
- MOVL CX,180(DI)
- MOVL R8,184(DI)
- MOVL R9,188(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- XORL 240(SI),DX
- XORL 244(SI),CX
- XORL 248(SI),R8
- XORL 252(SI),R9
- MOVL DX,240(DI)
- MOVL CX,244(DI)
- MOVL R8,248(DI)
- MOVL R9,252(DI)
- MOVQ 352(R12),R9
- SUBQ $256,R9
- ADDQ $256,SI
- ADDQ $256,DI
- CMPQ R9,$256
- JAE BYTESATLEAST256
- CMPQ R9,$0
- JBE DONE
- BYTESBETWEEN1AND255:
- CMPQ R9,$64
- JAE NOCOPY
- MOVQ DI,DX
- LEAQ 360(R12),DI
- MOVQ R9,CX
- REP; MOVSB
- LEAQ 360(R12),DI
- LEAQ 360(R12),SI
- NOCOPY:
- MOVQ R9,352(R12)
- MOVOA 48(R12),X0
- MOVOA 0(R12),X1
- MOVOA 16(R12),X2
- MOVOA 32(R12),X3
- MOVOA X1,X4
- MOVQ $20,CX
- MAINLOOP2:
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X3
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X3,X3
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X1
- PSHUFL $0X4E,X2,X2
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X1,X1
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X1
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X1,X1
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X3
- PSHUFL $0X4E,X2,X2
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X3,X3
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X3
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X3,X3
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X1
- PSHUFL $0X4E,X2,X2
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X1,X1
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X1
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X1,X1
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X3
- PSHUFL $0X4E,X2,X2
- PXOR X6,X3
- SUBQ $4,CX
- PADDL X3,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PXOR X7,X7
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X3,X3
- PXOR X6,X0
- JA MAINLOOP2
- PADDL 48(R12),X0
- PADDL 0(R12),X1
- PADDL 16(R12),X2
- PADDL 32(R12),X3
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 0(SI),CX
- XORL 48(SI),R8
- XORL 32(SI),R9
- XORL 16(SI),AX
- MOVL CX,0(DI)
- MOVL R8,48(DI)
- MOVL R9,32(DI)
- MOVL AX,16(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 20(SI),CX
- XORL 4(SI),R8
- XORL 52(SI),R9
- XORL 36(SI),AX
- MOVL CX,20(DI)
- MOVL R8,4(DI)
- MOVL R9,52(DI)
- MOVL AX,36(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 40(SI),CX
- XORL 24(SI),R8
- XORL 8(SI),R9
- XORL 56(SI),AX
- MOVL CX,40(DI)
- MOVL R8,24(DI)
- MOVL R9,8(DI)
- MOVL AX,56(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- XORL 60(SI),CX
- XORL 44(SI),R8
- XORL 28(SI),R9
- XORL 12(SI),AX
- MOVL CX,60(DI)
- MOVL R8,44(DI)
- MOVL R9,28(DI)
- MOVL AX,12(DI)
- MOVQ 352(R12),R9
- MOVL 16(R12),CX
- MOVL 36 (R12),R8
- ADDQ $1,CX
- SHLQ $32,R8
- ADDQ R8,CX
- MOVQ CX,R8
- SHRQ $32,R8
- MOVL CX,16(R12)
- MOVL R8, 36 (R12)
- CMPQ R9,$64
- JA BYTESATLEAST65
- JAE BYTESATLEAST64
- MOVQ DI,SI
- MOVQ DX,DI
- MOVQ R9,CX
- REP; MOVSB
- BYTESATLEAST64:
- DONE:
- RET
- BYTESATLEAST65:
- SUBQ $64,R9
- ADDQ $64,DI
- ADDQ $64,SI
- JMP BYTESBETWEEN1AND255
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
deleted file mode 100644
index 4392cc1..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !amd64 || purego || !gc
-// +build !amd64 purego !gc
-
-package salsa
-
-// XORKeyStream crypts bytes from in to out using the given key and counters.
-// In and out must overlap entirely or not at all. Counter
-// contains the raw salsa20 counter bytes (both nonce and block counter).
-func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
- genericXORKeyStream(out, in, counter, key)
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
deleted file mode 100644
index 68169c6..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package salsa
-
-const rounds = 20
-
-// core applies the Salsa20 core function to 16-byte input in, 32-byte key k,
-// and 16-byte constant c, and puts the result into 64-byte array out.
-func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
- j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
- j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
- j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
- j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
- j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
- j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
- j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
- j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
- j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
- j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
- j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
- j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
- j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
- j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
- j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
- j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
-
- x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
- x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
-
- for i := 0; i < rounds; i += 2 {
- u := x0 + x12
- x4 ^= u<<7 | u>>(32-7)
- u = x4 + x0
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x4
- x12 ^= u<<13 | u>>(32-13)
- u = x12 + x8
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x1
- x9 ^= u<<7 | u>>(32-7)
- u = x9 + x5
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x9
- x1 ^= u<<13 | u>>(32-13)
- u = x1 + x13
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x6
- x14 ^= u<<7 | u>>(32-7)
- u = x14 + x10
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x14
- x6 ^= u<<13 | u>>(32-13)
- u = x6 + x2
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x11
- x3 ^= u<<7 | u>>(32-7)
- u = x3 + x15
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x3
- x11 ^= u<<13 | u>>(32-13)
- u = x11 + x7
- x15 ^= u<<18 | u>>(32-18)
-
- u = x0 + x3
- x1 ^= u<<7 | u>>(32-7)
- u = x1 + x0
- x2 ^= u<<9 | u>>(32-9)
- u = x2 + x1
- x3 ^= u<<13 | u>>(32-13)
- u = x3 + x2
- x0 ^= u<<18 | u>>(32-18)
-
- u = x5 + x4
- x6 ^= u<<7 | u>>(32-7)
- u = x6 + x5
- x7 ^= u<<9 | u>>(32-9)
- u = x7 + x6
- x4 ^= u<<13 | u>>(32-13)
- u = x4 + x7
- x5 ^= u<<18 | u>>(32-18)
-
- u = x10 + x9
- x11 ^= u<<7 | u>>(32-7)
- u = x11 + x10
- x8 ^= u<<9 | u>>(32-9)
- u = x8 + x11
- x9 ^= u<<13 | u>>(32-13)
- u = x9 + x8
- x10 ^= u<<18 | u>>(32-18)
-
- u = x15 + x14
- x12 ^= u<<7 | u>>(32-7)
- u = x12 + x15
- x13 ^= u<<9 | u>>(32-9)
- u = x13 + x12
- x14 ^= u<<13 | u>>(32-13)
- u = x14 + x13
- x15 ^= u<<18 | u>>(32-18)
- }
- x0 += j0
- x1 += j1
- x2 += j2
- x3 += j3
- x4 += j4
- x5 += j5
- x6 += j6
- x7 += j7
- x8 += j8
- x9 += j9
- x10 += j10
- x11 += j11
- x12 += j12
- x13 += j13
- x14 += j14
- x15 += j15
-
- out[0] = byte(x0)
- out[1] = byte(x0 >> 8)
- out[2] = byte(x0 >> 16)
- out[3] = byte(x0 >> 24)
-
- out[4] = byte(x1)
- out[5] = byte(x1 >> 8)
- out[6] = byte(x1 >> 16)
- out[7] = byte(x1 >> 24)
-
- out[8] = byte(x2)
- out[9] = byte(x2 >> 8)
- out[10] = byte(x2 >> 16)
- out[11] = byte(x2 >> 24)
-
- out[12] = byte(x3)
- out[13] = byte(x3 >> 8)
- out[14] = byte(x3 >> 16)
- out[15] = byte(x3 >> 24)
-
- out[16] = byte(x4)
- out[17] = byte(x4 >> 8)
- out[18] = byte(x4 >> 16)
- out[19] = byte(x4 >> 24)
-
- out[20] = byte(x5)
- out[21] = byte(x5 >> 8)
- out[22] = byte(x5 >> 16)
- out[23] = byte(x5 >> 24)
-
- out[24] = byte(x6)
- out[25] = byte(x6 >> 8)
- out[26] = byte(x6 >> 16)
- out[27] = byte(x6 >> 24)
-
- out[28] = byte(x7)
- out[29] = byte(x7 >> 8)
- out[30] = byte(x7 >> 16)
- out[31] = byte(x7 >> 24)
-
- out[32] = byte(x8)
- out[33] = byte(x8 >> 8)
- out[34] = byte(x8 >> 16)
- out[35] = byte(x8 >> 24)
-
- out[36] = byte(x9)
- out[37] = byte(x9 >> 8)
- out[38] = byte(x9 >> 16)
- out[39] = byte(x9 >> 24)
-
- out[40] = byte(x10)
- out[41] = byte(x10 >> 8)
- out[42] = byte(x10 >> 16)
- out[43] = byte(x10 >> 24)
-
- out[44] = byte(x11)
- out[45] = byte(x11 >> 8)
- out[46] = byte(x11 >> 16)
- out[47] = byte(x11 >> 24)
-
- out[48] = byte(x12)
- out[49] = byte(x12 >> 8)
- out[50] = byte(x12 >> 16)
- out[51] = byte(x12 >> 24)
-
- out[52] = byte(x13)
- out[53] = byte(x13 >> 8)
- out[54] = byte(x13 >> 16)
- out[55] = byte(x13 >> 24)
-
- out[56] = byte(x14)
- out[57] = byte(x14 >> 8)
- out[58] = byte(x14 >> 16)
- out[59] = byte(x14 >> 24)
-
- out[60] = byte(x15)
- out[61] = byte(x15 >> 8)
- out[62] = byte(x15 >> 16)
- out[63] = byte(x15 >> 24)
-}
-
-// genericXORKeyStream is the generic implementation of XORKeyStream to be used
-// when no assembly implementation is available.
-func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
- var block [64]byte
- var counterCopy [16]byte
- copy(counterCopy[:], counter[:])
-
- for len(in) >= 64 {
- core(&block, &counterCopy, key, &Sigma)
- for i, x := range block {
- out[i] = in[i] ^ x
- }
- u := uint32(1)
- for i := 8; i < 16; i++ {
- u += uint32(counterCopy[i])
- counterCopy[i] = byte(u)
- u >>= 8
- }
- in = in[64:]
- out = out[64:]
- }
-
- if len(in) > 0 {
- core(&block, &counterCopy, key, &Sigma)
- for i, v := range in {
- out[i] = v ^ block[i]
- }
- }
-}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa20.go
deleted file mode 100644
index 8f4f896..0000000
--- a/vendor/golang.org/x/crypto/salsa20/salsa20.go
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package salsa20 implements the Salsa20 stream cipher as specified in https://cr.yp.to/snuffle/spec.pdf.
-
-Salsa20 differs from many other stream ciphers in that it is message orientated
-rather than byte orientated. Keystream blocks are not preserved between calls,
-therefore each side must encrypt/decrypt data with the same segmentation.
-
-Another aspect of this difference is that part of the counter is exposed as
-a nonce in each call. Encrypting two different messages with the same (key,
-nonce) pair leads to trivial plaintext recovery. This is analogous to
-encrypting two different messages with the same key with a traditional stream
-cipher.
-
-This package also implements XSalsa20: a version of Salsa20 with a 24-byte
-nonce as specified in https://cr.yp.to/snuffle/xsalsa-20081128.pdf. Simply
-passing a 24-byte slice as the nonce triggers XSalsa20.
-*/
-package salsa20 // import "golang.org/x/crypto/salsa20"
-
-// TODO(agl): implement XORKeyStream12 and XORKeyStream8 - the reduced round variants of Salsa20.
-
-import (
- "golang.org/x/crypto/internal/alias"
- "golang.org/x/crypto/salsa20/salsa"
-)
-
-// XORKeyStream crypts bytes from in to out using the given key and nonce.
-// In and out must overlap entirely or not at all. Nonce must
-// be either 8 or 24 bytes long.
-func XORKeyStream(out, in []byte, nonce []byte, key *[32]byte) {
- if len(out) < len(in) {
- panic("salsa20: output smaller than input")
- }
- if alias.InexactOverlap(out[:len(in)], in) {
- panic("salsa20: invalid buffer overlap")
- }
-
- var subNonce [16]byte
-
- if len(nonce) == 24 {
- var subKey [32]byte
- var hNonce [16]byte
- copy(hNonce[:], nonce[:16])
- salsa.HSalsa20(&subKey, &hNonce, key, &salsa.Sigma)
- copy(subNonce[:], nonce[16:])
- key = &subKey
- } else if len(nonce) == 8 {
- copy(subNonce[:], nonce[:])
- } else {
- panic("salsa20: nonce must be 8 or 24 bytes")
- }
-
- salsa.XORKeyStream(out, in, &subNonce, key)
-}
diff --git a/vendor/golang.org/x/crypto/scrypt/scrypt.go b/vendor/golang.org/x/crypto/scrypt/scrypt.go
deleted file mode 100644
index c971a99..0000000
--- a/vendor/golang.org/x/crypto/scrypt/scrypt.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package scrypt implements the scrypt key derivation function as defined in
-// Colin Percival's paper "Stronger Key Derivation via Sequential Memory-Hard
-// Functions" (https://www.tarsnap.com/scrypt/scrypt.pdf).
-package scrypt // import "golang.org/x/crypto/scrypt"
-
-import (
- "crypto/sha256"
- "encoding/binary"
- "errors"
- "math/bits"
-
- "golang.org/x/crypto/pbkdf2"
-)
-
-const maxInt = int(^uint(0) >> 1)
-
-// blockCopy copies n numbers from src into dst.
-func blockCopy(dst, src []uint32, n int) {
- copy(dst, src[:n])
-}
-
-// blockXOR XORs numbers from dst with n numbers from src.
-func blockXOR(dst, src []uint32, n int) {
- for i, v := range src[:n] {
- dst[i] ^= v
- }
-}
-
-// salsaXOR applies Salsa20/8 to the XOR of 16 numbers from tmp and in,
-// and puts the result into both tmp and out.
-func salsaXOR(tmp *[16]uint32, in, out []uint32) {
- w0 := tmp[0] ^ in[0]
- w1 := tmp[1] ^ in[1]
- w2 := tmp[2] ^ in[2]
- w3 := tmp[3] ^ in[3]
- w4 := tmp[4] ^ in[4]
- w5 := tmp[5] ^ in[5]
- w6 := tmp[6] ^ in[6]
- w7 := tmp[7] ^ in[7]
- w8 := tmp[8] ^ in[8]
- w9 := tmp[9] ^ in[9]
- w10 := tmp[10] ^ in[10]
- w11 := tmp[11] ^ in[11]
- w12 := tmp[12] ^ in[12]
- w13 := tmp[13] ^ in[13]
- w14 := tmp[14] ^ in[14]
- w15 := tmp[15] ^ in[15]
-
- x0, x1, x2, x3, x4, x5, x6, x7, x8 := w0, w1, w2, w3, w4, w5, w6, w7, w8
- x9, x10, x11, x12, x13, x14, x15 := w9, w10, w11, w12, w13, w14, w15
-
- for i := 0; i < 8; i += 2 {
- x4 ^= bits.RotateLeft32(x0+x12, 7)
- x8 ^= bits.RotateLeft32(x4+x0, 9)
- x12 ^= bits.RotateLeft32(x8+x4, 13)
- x0 ^= bits.RotateLeft32(x12+x8, 18)
-
- x9 ^= bits.RotateLeft32(x5+x1, 7)
- x13 ^= bits.RotateLeft32(x9+x5, 9)
- x1 ^= bits.RotateLeft32(x13+x9, 13)
- x5 ^= bits.RotateLeft32(x1+x13, 18)
-
- x14 ^= bits.RotateLeft32(x10+x6, 7)
- x2 ^= bits.RotateLeft32(x14+x10, 9)
- x6 ^= bits.RotateLeft32(x2+x14, 13)
- x10 ^= bits.RotateLeft32(x6+x2, 18)
-
- x3 ^= bits.RotateLeft32(x15+x11, 7)
- x7 ^= bits.RotateLeft32(x3+x15, 9)
- x11 ^= bits.RotateLeft32(x7+x3, 13)
- x15 ^= bits.RotateLeft32(x11+x7, 18)
-
- x1 ^= bits.RotateLeft32(x0+x3, 7)
- x2 ^= bits.RotateLeft32(x1+x0, 9)
- x3 ^= bits.RotateLeft32(x2+x1, 13)
- x0 ^= bits.RotateLeft32(x3+x2, 18)
-
- x6 ^= bits.RotateLeft32(x5+x4, 7)
- x7 ^= bits.RotateLeft32(x6+x5, 9)
- x4 ^= bits.RotateLeft32(x7+x6, 13)
- x5 ^= bits.RotateLeft32(x4+x7, 18)
-
- x11 ^= bits.RotateLeft32(x10+x9, 7)
- x8 ^= bits.RotateLeft32(x11+x10, 9)
- x9 ^= bits.RotateLeft32(x8+x11, 13)
- x10 ^= bits.RotateLeft32(x9+x8, 18)
-
- x12 ^= bits.RotateLeft32(x15+x14, 7)
- x13 ^= bits.RotateLeft32(x12+x15, 9)
- x14 ^= bits.RotateLeft32(x13+x12, 13)
- x15 ^= bits.RotateLeft32(x14+x13, 18)
- }
- x0 += w0
- x1 += w1
- x2 += w2
- x3 += w3
- x4 += w4
- x5 += w5
- x6 += w6
- x7 += w7
- x8 += w8
- x9 += w9
- x10 += w10
- x11 += w11
- x12 += w12
- x13 += w13
- x14 += w14
- x15 += w15
-
- out[0], tmp[0] = x0, x0
- out[1], tmp[1] = x1, x1
- out[2], tmp[2] = x2, x2
- out[3], tmp[3] = x3, x3
- out[4], tmp[4] = x4, x4
- out[5], tmp[5] = x5, x5
- out[6], tmp[6] = x6, x6
- out[7], tmp[7] = x7, x7
- out[8], tmp[8] = x8, x8
- out[9], tmp[9] = x9, x9
- out[10], tmp[10] = x10, x10
- out[11], tmp[11] = x11, x11
- out[12], tmp[12] = x12, x12
- out[13], tmp[13] = x13, x13
- out[14], tmp[14] = x14, x14
- out[15], tmp[15] = x15, x15
-}
-
-func blockMix(tmp *[16]uint32, in, out []uint32, r int) {
- blockCopy(tmp[:], in[(2*r-1)*16:], 16)
- for i := 0; i < 2*r; i += 2 {
- salsaXOR(tmp, in[i*16:], out[i*8:])
- salsaXOR(tmp, in[i*16+16:], out[i*8+r*16:])
- }
-}
-
-func integer(b []uint32, r int) uint64 {
- j := (2*r - 1) * 16
- return uint64(b[j]) | uint64(b[j+1])<<32
-}
-
-func smix(b []byte, r, N int, v, xy []uint32) {
- var tmp [16]uint32
- R := 32 * r
- x := xy
- y := xy[R:]
-
- j := 0
- for i := 0; i < R; i++ {
- x[i] = binary.LittleEndian.Uint32(b[j:])
- j += 4
- }
- for i := 0; i < N; i += 2 {
- blockCopy(v[i*R:], x, R)
- blockMix(&tmp, x, y, r)
-
- blockCopy(v[(i+1)*R:], y, R)
- blockMix(&tmp, y, x, r)
- }
- for i := 0; i < N; i += 2 {
- j := int(integer(x, r) & uint64(N-1))
- blockXOR(x, v[j*R:], R)
- blockMix(&tmp, x, y, r)
-
- j = int(integer(y, r) & uint64(N-1))
- blockXOR(y, v[j*R:], R)
- blockMix(&tmp, y, x, r)
- }
- j = 0
- for _, v := range x[:R] {
- binary.LittleEndian.PutUint32(b[j:], v)
- j += 4
- }
-}
-
-// Key derives a key from the password, salt, and cost parameters, returning
-// a byte slice of length keyLen that can be used as cryptographic key.
-//
-// N is a CPU/memory cost parameter, which must be a power of two greater than 1.
-// r and p must satisfy r * p < 2³⁰. If the parameters do not satisfy the
-// limits, the function returns a nil byte slice and an error.
-//
-// For example, you can get a derived key for e.g. AES-256 (which needs a
-// 32-byte key) by doing:
-//
-// dk, err := scrypt.Key([]byte("some password"), salt, 32768, 8, 1, 32)
-//
-// The recommended parameters for interactive logins as of 2017 are N=32768, r=8
-// and p=1. The parameters N, r, and p should be increased as memory latency and
-// CPU parallelism increases; consider setting N to the highest power of 2 you
-// can derive within 100 milliseconds. Remember to get a good random salt.
-func Key(password, salt []byte, N, r, p, keyLen int) ([]byte, error) {
- if N <= 1 || N&(N-1) != 0 {
- return nil, errors.New("scrypt: N must be > 1 and a power of 2")
- }
- if uint64(r)*uint64(p) >= 1<<30 || r > maxInt/128/p || r > maxInt/256 || N > maxInt/128/r {
- return nil, errors.New("scrypt: parameters are too large")
- }
-
- xy := make([]uint32, 64*r)
- v := make([]uint32, 32*N*r)
- b := pbkdf2.Key(password, salt, 1, p*128*r, sha256.New)
-
- for i := 0; i < p; i++ {
- smix(b[i*128*r:], r, N, v, xy)
- }
-
- return pbkdf2.Key(password, b, 1, keyLen, sha256.New), nil
-}
diff --git a/vendor/golang.org/x/crypto/sha3/doc.go b/vendor/golang.org/x/crypto/sha3/doc.go
deleted file mode 100644
index decd8cf..0000000
--- a/vendor/golang.org/x/crypto/sha3/doc.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package sha3 implements the SHA-3 fixed-output-length hash functions and
-// the SHAKE variable-output-length hash functions defined by FIPS-202.
-//
-// Both types of hash function use the "sponge" construction and the Keccak
-// permutation. For a detailed specification see http://keccak.noekeon.org/
-//
-// # Guidance
-//
-// If you aren't sure what function you need, use SHAKE256 with at least 64
-// bytes of output. The SHAKE instances are faster than the SHA3 instances;
-// the latter have to allocate memory to conform to the hash.Hash interface.
-//
-// If you need a secret-key MAC (message authentication code), prepend the
-// secret key to the input, hash with SHAKE256 and read at least 32 bytes of
-// output.
-//
-// # Security strengths
-//
-// The SHA3-x (x equals 224, 256, 384, or 512) functions have a security
-// strength against preimage attacks of x bits. Since they only produce "x"
-// bits of output, their collision-resistance is only "x/2" bits.
-//
-// The SHAKE-256 and -128 functions have a generic security strength of 256 and
-// 128 bits against all attacks, provided that at least 2x bits of their output
-// is used. Requesting more than 64 or 32 bytes of output, respectively, does
-// not increase the collision-resistance of the SHAKE functions.
-//
-// # The sponge construction
-//
-// A sponge builds a pseudo-random function from a public pseudo-random
-// permutation, by applying the permutation to a state of "rate + capacity"
-// bytes, but hiding "capacity" of the bytes.
-//
-// A sponge starts out with a zero state. To hash an input using a sponge, up
-// to "rate" bytes of the input are XORed into the sponge's state. The sponge
-// is then "full" and the permutation is applied to "empty" it. This process is
-// repeated until all the input has been "absorbed". The input is then padded.
-// The digest is "squeezed" from the sponge in the same way, except that output
-// is copied out instead of input being XORed in.
-//
-// A sponge is parameterized by its generic security strength, which is equal
-// to half its capacity; capacity + rate is equal to the permutation's width.
-// Since the KeccakF-1600 permutation is 1600 bits (200 bytes) wide, this means
-// that the security strength of a sponge instance is equal to (1600 - bitrate) / 2.
-//
-// # Recommendations
-//
-// The SHAKE functions are recommended for most new uses. They can produce
-// output of arbitrary length. SHAKE256, with an output length of at least
-// 64 bytes, provides 256-bit security against all attacks. The Keccak team
-// recommends it for most applications upgrading from SHA2-512. (NIST chose a
-// much stronger, but much slower, sponge instance for SHA3-512.)
-//
-// The SHA-3 functions are "drop-in" replacements for the SHA-2 functions.
-// They produce output of the same length, with the same security strengths
-// against all attacks. This means, in particular, that SHA3-256 only has
-// 128-bit collision resistance, because its output length is 32 bytes.
-package sha3 // import "golang.org/x/crypto/sha3"
diff --git a/vendor/golang.org/x/crypto/sha3/hashes.go b/vendor/golang.org/x/crypto/sha3/hashes.go
deleted file mode 100644
index 0d8043f..0000000
--- a/vendor/golang.org/x/crypto/sha3/hashes.go
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sha3
-
-// This file provides functions for creating instances of the SHA-3
-// and SHAKE hash functions, as well as utility functions for hashing
-// bytes.
-
-import (
- "hash"
-)
-
-// New224 creates a new SHA3-224 hash.
-// Its generic security strength is 224 bits against preimage attacks,
-// and 112 bits against collision attacks.
-func New224() hash.Hash {
- if h := new224Asm(); h != nil {
- return h
- }
- return &state{rate: 144, outputLen: 28, dsbyte: 0x06}
-}
-
-// New256 creates a new SHA3-256 hash.
-// Its generic security strength is 256 bits against preimage attacks,
-// and 128 bits against collision attacks.
-func New256() hash.Hash {
- if h := new256Asm(); h != nil {
- return h
- }
- return &state{rate: 136, outputLen: 32, dsbyte: 0x06}
-}
-
-// New384 creates a new SHA3-384 hash.
-// Its generic security strength is 384 bits against preimage attacks,
-// and 192 bits against collision attacks.
-func New384() hash.Hash {
- if h := new384Asm(); h != nil {
- return h
- }
- return &state{rate: 104, outputLen: 48, dsbyte: 0x06}
-}
-
-// New512 creates a new SHA3-512 hash.
-// Its generic security strength is 512 bits against preimage attacks,
-// and 256 bits against collision attacks.
-func New512() hash.Hash {
- if h := new512Asm(); h != nil {
- return h
- }
- return &state{rate: 72, outputLen: 64, dsbyte: 0x06}
-}
-
-// NewLegacyKeccak256 creates a new Keccak-256 hash.
-//
-// Only use this function if you require compatibility with an existing cryptosystem
-// that uses non-standard padding. All other users should use New256 instead.
-func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} }
-
-// NewLegacyKeccak512 creates a new Keccak-512 hash.
-//
-// Only use this function if you require compatibility with an existing cryptosystem
-// that uses non-standard padding. All other users should use New512 instead.
-func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} }
-
-// Sum224 returns the SHA3-224 digest of the data.
-func Sum224(data []byte) (digest [28]byte) {
- h := New224()
- h.Write(data)
- h.Sum(digest[:0])
- return
-}
-
-// Sum256 returns the SHA3-256 digest of the data.
-func Sum256(data []byte) (digest [32]byte) {
- h := New256()
- h.Write(data)
- h.Sum(digest[:0])
- return
-}
-
-// Sum384 returns the SHA3-384 digest of the data.
-func Sum384(data []byte) (digest [48]byte) {
- h := New384()
- h.Write(data)
- h.Sum(digest[:0])
- return
-}
-
-// Sum512 returns the SHA3-512 digest of the data.
-func Sum512(data []byte) (digest [64]byte) {
- h := New512()
- h.Write(data)
- h.Sum(digest[:0])
- return
-}
diff --git a/vendor/golang.org/x/crypto/sha3/hashes_generic.go b/vendor/golang.org/x/crypto/sha3/hashes_generic.go
deleted file mode 100644
index c74fc20..0000000
--- a/vendor/golang.org/x/crypto/sha3/hashes_generic.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !gc || purego || !s390x
-// +build !gc purego !s390x
-
-package sha3
-
-import (
- "hash"
-)
-
-// new224Asm returns an assembly implementation of SHA3-224 if available,
-// otherwise it returns nil.
-func new224Asm() hash.Hash { return nil }
-
-// new256Asm returns an assembly implementation of SHA3-256 if available,
-// otherwise it returns nil.
-func new256Asm() hash.Hash { return nil }
-
-// new384Asm returns an assembly implementation of SHA3-384 if available,
-// otherwise it returns nil.
-func new384Asm() hash.Hash { return nil }
-
-// new512Asm returns an assembly implementation of SHA3-512 if available,
-// otherwise it returns nil.
-func new512Asm() hash.Hash { return nil }
diff --git a/vendor/golang.org/x/crypto/sha3/keccakf.go b/vendor/golang.org/x/crypto/sha3/keccakf.go
deleted file mode 100644
index 0f4ae8b..0000000
--- a/vendor/golang.org/x/crypto/sha3/keccakf.go
+++ /dev/null
@@ -1,413 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !amd64 || purego || !gc
-// +build !amd64 purego !gc
-
-package sha3
-
-// rc stores the round constants for use in the ι step.
-var rc = [24]uint64{
- 0x0000000000000001,
- 0x0000000000008082,
- 0x800000000000808A,
- 0x8000000080008000,
- 0x000000000000808B,
- 0x0000000080000001,
- 0x8000000080008081,
- 0x8000000000008009,
- 0x000000000000008A,
- 0x0000000000000088,
- 0x0000000080008009,
- 0x000000008000000A,
- 0x000000008000808B,
- 0x800000000000008B,
- 0x8000000000008089,
- 0x8000000000008003,
- 0x8000000000008002,
- 0x8000000000000080,
- 0x000000000000800A,
- 0x800000008000000A,
- 0x8000000080008081,
- 0x8000000000008080,
- 0x0000000080000001,
- 0x8000000080008008,
-}
-
-// keccakF1600 applies the Keccak permutation to a 1600b-wide
-// state represented as a slice of 25 uint64s.
-func keccakF1600(a *[25]uint64) {
- // Implementation translated from Keccak-inplace.c
- // in the keccak reference code.
- var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64
-
- for i := 0; i < 24; i += 4 {
- // Combines the 5 steps in each round into 2 steps.
- // Unrolls 4 rounds per loop and spreads some steps across rounds.
-
- // Round 1
- bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
- bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
- bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
- bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
- bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
- d0 = bc4 ^ (bc1<<1 | bc1>>63)
- d1 = bc0 ^ (bc2<<1 | bc2>>63)
- d2 = bc1 ^ (bc3<<1 | bc3>>63)
- d3 = bc2 ^ (bc4<<1 | bc4>>63)
- d4 = bc3 ^ (bc0<<1 | bc0>>63)
-
- bc0 = a[0] ^ d0
- t = a[6] ^ d1
- bc1 = t<<44 | t>>(64-44)
- t = a[12] ^ d2
- bc2 = t<<43 | t>>(64-43)
- t = a[18] ^ d3
- bc3 = t<<21 | t>>(64-21)
- t = a[24] ^ d4
- bc4 = t<<14 | t>>(64-14)
- a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i]
- a[6] = bc1 ^ (bc3 &^ bc2)
- a[12] = bc2 ^ (bc4 &^ bc3)
- a[18] = bc3 ^ (bc0 &^ bc4)
- a[24] = bc4 ^ (bc1 &^ bc0)
-
- t = a[10] ^ d0
- bc2 = t<<3 | t>>(64-3)
- t = a[16] ^ d1
- bc3 = t<<45 | t>>(64-45)
- t = a[22] ^ d2
- bc4 = t<<61 | t>>(64-61)
- t = a[3] ^ d3
- bc0 = t<<28 | t>>(64-28)
- t = a[9] ^ d4
- bc1 = t<<20 | t>>(64-20)
- a[10] = bc0 ^ (bc2 &^ bc1)
- a[16] = bc1 ^ (bc3 &^ bc2)
- a[22] = bc2 ^ (bc4 &^ bc3)
- a[3] = bc3 ^ (bc0 &^ bc4)
- a[9] = bc4 ^ (bc1 &^ bc0)
-
- t = a[20] ^ d0
- bc4 = t<<18 | t>>(64-18)
- t = a[1] ^ d1
- bc0 = t<<1 | t>>(64-1)
- t = a[7] ^ d2
- bc1 = t<<6 | t>>(64-6)
- t = a[13] ^ d3
- bc2 = t<<25 | t>>(64-25)
- t = a[19] ^ d4
- bc3 = t<<8 | t>>(64-8)
- a[20] = bc0 ^ (bc2 &^ bc1)
- a[1] = bc1 ^ (bc3 &^ bc2)
- a[7] = bc2 ^ (bc4 &^ bc3)
- a[13] = bc3 ^ (bc0 &^ bc4)
- a[19] = bc4 ^ (bc1 &^ bc0)
-
- t = a[5] ^ d0
- bc1 = t<<36 | t>>(64-36)
- t = a[11] ^ d1
- bc2 = t<<10 | t>>(64-10)
- t = a[17] ^ d2
- bc3 = t<<15 | t>>(64-15)
- t = a[23] ^ d3
- bc4 = t<<56 | t>>(64-56)
- t = a[4] ^ d4
- bc0 = t<<27 | t>>(64-27)
- a[5] = bc0 ^ (bc2 &^ bc1)
- a[11] = bc1 ^ (bc3 &^ bc2)
- a[17] = bc2 ^ (bc4 &^ bc3)
- a[23] = bc3 ^ (bc0 &^ bc4)
- a[4] = bc4 ^ (bc1 &^ bc0)
-
- t = a[15] ^ d0
- bc3 = t<<41 | t>>(64-41)
- t = a[21] ^ d1
- bc4 = t<<2 | t>>(64-2)
- t = a[2] ^ d2
- bc0 = t<<62 | t>>(64-62)
- t = a[8] ^ d3
- bc1 = t<<55 | t>>(64-55)
- t = a[14] ^ d4
- bc2 = t<<39 | t>>(64-39)
- a[15] = bc0 ^ (bc2 &^ bc1)
- a[21] = bc1 ^ (bc3 &^ bc2)
- a[2] = bc2 ^ (bc4 &^ bc3)
- a[8] = bc3 ^ (bc0 &^ bc4)
- a[14] = bc4 ^ (bc1 &^ bc0)
-
- // Round 2
- bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
- bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
- bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
- bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
- bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
- d0 = bc4 ^ (bc1<<1 | bc1>>63)
- d1 = bc0 ^ (bc2<<1 | bc2>>63)
- d2 = bc1 ^ (bc3<<1 | bc3>>63)
- d3 = bc2 ^ (bc4<<1 | bc4>>63)
- d4 = bc3 ^ (bc0<<1 | bc0>>63)
-
- bc0 = a[0] ^ d0
- t = a[16] ^ d1
- bc1 = t<<44 | t>>(64-44)
- t = a[7] ^ d2
- bc2 = t<<43 | t>>(64-43)
- t = a[23] ^ d3
- bc3 = t<<21 | t>>(64-21)
- t = a[14] ^ d4
- bc4 = t<<14 | t>>(64-14)
- a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1]
- a[16] = bc1 ^ (bc3 &^ bc2)
- a[7] = bc2 ^ (bc4 &^ bc3)
- a[23] = bc3 ^ (bc0 &^ bc4)
- a[14] = bc4 ^ (bc1 &^ bc0)
-
- t = a[20] ^ d0
- bc2 = t<<3 | t>>(64-3)
- t = a[11] ^ d1
- bc3 = t<<45 | t>>(64-45)
- t = a[2] ^ d2
- bc4 = t<<61 | t>>(64-61)
- t = a[18] ^ d3
- bc0 = t<<28 | t>>(64-28)
- t = a[9] ^ d4
- bc1 = t<<20 | t>>(64-20)
- a[20] = bc0 ^ (bc2 &^ bc1)
- a[11] = bc1 ^ (bc3 &^ bc2)
- a[2] = bc2 ^ (bc4 &^ bc3)
- a[18] = bc3 ^ (bc0 &^ bc4)
- a[9] = bc4 ^ (bc1 &^ bc0)
-
- t = a[15] ^ d0
- bc4 = t<<18 | t>>(64-18)
- t = a[6] ^ d1
- bc0 = t<<1 | t>>(64-1)
- t = a[22] ^ d2
- bc1 = t<<6 | t>>(64-6)
- t = a[13] ^ d3
- bc2 = t<<25 | t>>(64-25)
- t = a[4] ^ d4
- bc3 = t<<8 | t>>(64-8)
- a[15] = bc0 ^ (bc2 &^ bc1)
- a[6] = bc1 ^ (bc3 &^ bc2)
- a[22] = bc2 ^ (bc4 &^ bc3)
- a[13] = bc3 ^ (bc0 &^ bc4)
- a[4] = bc4 ^ (bc1 &^ bc0)
-
- t = a[10] ^ d0
- bc1 = t<<36 | t>>(64-36)
- t = a[1] ^ d1
- bc2 = t<<10 | t>>(64-10)
- t = a[17] ^ d2
- bc3 = t<<15 | t>>(64-15)
- t = a[8] ^ d3
- bc4 = t<<56 | t>>(64-56)
- t = a[24] ^ d4
- bc0 = t<<27 | t>>(64-27)
- a[10] = bc0 ^ (bc2 &^ bc1)
- a[1] = bc1 ^ (bc3 &^ bc2)
- a[17] = bc2 ^ (bc4 &^ bc3)
- a[8] = bc3 ^ (bc0 &^ bc4)
- a[24] = bc4 ^ (bc1 &^ bc0)
-
- t = a[5] ^ d0
- bc3 = t<<41 | t>>(64-41)
- t = a[21] ^ d1
- bc4 = t<<2 | t>>(64-2)
- t = a[12] ^ d2
- bc0 = t<<62 | t>>(64-62)
- t = a[3] ^ d3
- bc1 = t<<55 | t>>(64-55)
- t = a[19] ^ d4
- bc2 = t<<39 | t>>(64-39)
- a[5] = bc0 ^ (bc2 &^ bc1)
- a[21] = bc1 ^ (bc3 &^ bc2)
- a[12] = bc2 ^ (bc4 &^ bc3)
- a[3] = bc3 ^ (bc0 &^ bc4)
- a[19] = bc4 ^ (bc1 &^ bc0)
-
- // Round 3
- bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
- bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
- bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
- bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
- bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
- d0 = bc4 ^ (bc1<<1 | bc1>>63)
- d1 = bc0 ^ (bc2<<1 | bc2>>63)
- d2 = bc1 ^ (bc3<<1 | bc3>>63)
- d3 = bc2 ^ (bc4<<1 | bc4>>63)
- d4 = bc3 ^ (bc0<<1 | bc0>>63)
-
- bc0 = a[0] ^ d0
- t = a[11] ^ d1
- bc1 = t<<44 | t>>(64-44)
- t = a[22] ^ d2
- bc2 = t<<43 | t>>(64-43)
- t = a[8] ^ d3
- bc3 = t<<21 | t>>(64-21)
- t = a[19] ^ d4
- bc4 = t<<14 | t>>(64-14)
- a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2]
- a[11] = bc1 ^ (bc3 &^ bc2)
- a[22] = bc2 ^ (bc4 &^ bc3)
- a[8] = bc3 ^ (bc0 &^ bc4)
- a[19] = bc4 ^ (bc1 &^ bc0)
-
- t = a[15] ^ d0
- bc2 = t<<3 | t>>(64-3)
- t = a[1] ^ d1
- bc3 = t<<45 | t>>(64-45)
- t = a[12] ^ d2
- bc4 = t<<61 | t>>(64-61)
- t = a[23] ^ d3
- bc0 = t<<28 | t>>(64-28)
- t = a[9] ^ d4
- bc1 = t<<20 | t>>(64-20)
- a[15] = bc0 ^ (bc2 &^ bc1)
- a[1] = bc1 ^ (bc3 &^ bc2)
- a[12] = bc2 ^ (bc4 &^ bc3)
- a[23] = bc3 ^ (bc0 &^ bc4)
- a[9] = bc4 ^ (bc1 &^ bc0)
-
- t = a[5] ^ d0
- bc4 = t<<18 | t>>(64-18)
- t = a[16] ^ d1
- bc0 = t<<1 | t>>(64-1)
- t = a[2] ^ d2
- bc1 = t<<6 | t>>(64-6)
- t = a[13] ^ d3
- bc2 = t<<25 | t>>(64-25)
- t = a[24] ^ d4
- bc3 = t<<8 | t>>(64-8)
- a[5] = bc0 ^ (bc2 &^ bc1)
- a[16] = bc1 ^ (bc3 &^ bc2)
- a[2] = bc2 ^ (bc4 &^ bc3)
- a[13] = bc3 ^ (bc0 &^ bc4)
- a[24] = bc4 ^ (bc1 &^ bc0)
-
- t = a[20] ^ d0
- bc1 = t<<36 | t>>(64-36)
- t = a[6] ^ d1
- bc2 = t<<10 | t>>(64-10)
- t = a[17] ^ d2
- bc3 = t<<15 | t>>(64-15)
- t = a[3] ^ d3
- bc4 = t<<56 | t>>(64-56)
- t = a[14] ^ d4
- bc0 = t<<27 | t>>(64-27)
- a[20] = bc0 ^ (bc2 &^ bc1)
- a[6] = bc1 ^ (bc3 &^ bc2)
- a[17] = bc2 ^ (bc4 &^ bc3)
- a[3] = bc3 ^ (bc0 &^ bc4)
- a[14] = bc4 ^ (bc1 &^ bc0)
-
- t = a[10] ^ d0
- bc3 = t<<41 | t>>(64-41)
- t = a[21] ^ d1
- bc4 = t<<2 | t>>(64-2)
- t = a[7] ^ d2
- bc0 = t<<62 | t>>(64-62)
- t = a[18] ^ d3
- bc1 = t<<55 | t>>(64-55)
- t = a[4] ^ d4
- bc2 = t<<39 | t>>(64-39)
- a[10] = bc0 ^ (bc2 &^ bc1)
- a[21] = bc1 ^ (bc3 &^ bc2)
- a[7] = bc2 ^ (bc4 &^ bc3)
- a[18] = bc3 ^ (bc0 &^ bc4)
- a[4] = bc4 ^ (bc1 &^ bc0)
-
- // Round 4
- bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
- bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
- bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
- bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
- bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
- d0 = bc4 ^ (bc1<<1 | bc1>>63)
- d1 = bc0 ^ (bc2<<1 | bc2>>63)
- d2 = bc1 ^ (bc3<<1 | bc3>>63)
- d3 = bc2 ^ (bc4<<1 | bc4>>63)
- d4 = bc3 ^ (bc0<<1 | bc0>>63)
-
- bc0 = a[0] ^ d0
- t = a[1] ^ d1
- bc1 = t<<44 | t>>(64-44)
- t = a[2] ^ d2
- bc2 = t<<43 | t>>(64-43)
- t = a[3] ^ d3
- bc3 = t<<21 | t>>(64-21)
- t = a[4] ^ d4
- bc4 = t<<14 | t>>(64-14)
- a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3]
- a[1] = bc1 ^ (bc3 &^ bc2)
- a[2] = bc2 ^ (bc4 &^ bc3)
- a[3] = bc3 ^ (bc0 &^ bc4)
- a[4] = bc4 ^ (bc1 &^ bc0)
-
- t = a[5] ^ d0
- bc2 = t<<3 | t>>(64-3)
- t = a[6] ^ d1
- bc3 = t<<45 | t>>(64-45)
- t = a[7] ^ d2
- bc4 = t<<61 | t>>(64-61)
- t = a[8] ^ d3
- bc0 = t<<28 | t>>(64-28)
- t = a[9] ^ d4
- bc1 = t<<20 | t>>(64-20)
- a[5] = bc0 ^ (bc2 &^ bc1)
- a[6] = bc1 ^ (bc3 &^ bc2)
- a[7] = bc2 ^ (bc4 &^ bc3)
- a[8] = bc3 ^ (bc0 &^ bc4)
- a[9] = bc4 ^ (bc1 &^ bc0)
-
- t = a[10] ^ d0
- bc4 = t<<18 | t>>(64-18)
- t = a[11] ^ d1
- bc0 = t<<1 | t>>(64-1)
- t = a[12] ^ d2
- bc1 = t<<6 | t>>(64-6)
- t = a[13] ^ d3
- bc2 = t<<25 | t>>(64-25)
- t = a[14] ^ d4
- bc3 = t<<8 | t>>(64-8)
- a[10] = bc0 ^ (bc2 &^ bc1)
- a[11] = bc1 ^ (bc3 &^ bc2)
- a[12] = bc2 ^ (bc4 &^ bc3)
- a[13] = bc3 ^ (bc0 &^ bc4)
- a[14] = bc4 ^ (bc1 &^ bc0)
-
- t = a[15] ^ d0
- bc1 = t<<36 | t>>(64-36)
- t = a[16] ^ d1
- bc2 = t<<10 | t>>(64-10)
- t = a[17] ^ d2
- bc3 = t<<15 | t>>(64-15)
- t = a[18] ^ d3
- bc4 = t<<56 | t>>(64-56)
- t = a[19] ^ d4
- bc0 = t<<27 | t>>(64-27)
- a[15] = bc0 ^ (bc2 &^ bc1)
- a[16] = bc1 ^ (bc3 &^ bc2)
- a[17] = bc2 ^ (bc4 &^ bc3)
- a[18] = bc3 ^ (bc0 &^ bc4)
- a[19] = bc4 ^ (bc1 &^ bc0)
-
- t = a[20] ^ d0
- bc3 = t<<41 | t>>(64-41)
- t = a[21] ^ d1
- bc4 = t<<2 | t>>(64-2)
- t = a[22] ^ d2
- bc0 = t<<62 | t>>(64-62)
- t = a[23] ^ d3
- bc1 = t<<55 | t>>(64-55)
- t = a[24] ^ d4
- bc2 = t<<39 | t>>(64-39)
- a[20] = bc0 ^ (bc2 &^ bc1)
- a[21] = bc1 ^ (bc3 &^ bc2)
- a[22] = bc2 ^ (bc4 &^ bc3)
- a[23] = bc3 ^ (bc0 &^ bc4)
- a[24] = bc4 ^ (bc1 &^ bc0)
- }
-}
diff --git a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go
deleted file mode 100644
index 248a382..0000000
--- a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && !purego && gc
-// +build amd64,!purego,gc
-
-package sha3
-
-// This function is implemented in keccakf_amd64.s.
-
-//go:noescape
-
-func keccakF1600(a *[25]uint64)
diff --git a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s
deleted file mode 100644
index 4cfa543..0000000
--- a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s
+++ /dev/null
@@ -1,391 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && !purego && gc
-// +build amd64,!purego,gc
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources at https://github.com/gvanas/KeccakCodePackage
-
-// Offsets in state
-#define _ba (0*8)
-#define _be (1*8)
-#define _bi (2*8)
-#define _bo (3*8)
-#define _bu (4*8)
-#define _ga (5*8)
-#define _ge (6*8)
-#define _gi (7*8)
-#define _go (8*8)
-#define _gu (9*8)
-#define _ka (10*8)
-#define _ke (11*8)
-#define _ki (12*8)
-#define _ko (13*8)
-#define _ku (14*8)
-#define _ma (15*8)
-#define _me (16*8)
-#define _mi (17*8)
-#define _mo (18*8)
-#define _mu (19*8)
-#define _sa (20*8)
-#define _se (21*8)
-#define _si (22*8)
-#define _so (23*8)
-#define _su (24*8)
-
-// Temporary registers
-#define rT1 AX
-
-// Round vars
-#define rpState DI
-#define rpStack SP
-
-#define rDa BX
-#define rDe CX
-#define rDi DX
-#define rDo R8
-#define rDu R9
-
-#define rBa R10
-#define rBe R11
-#define rBi R12
-#define rBo R13
-#define rBu R14
-
-#define rCa SI
-#define rCe BP
-#define rCi rBi
-#define rCo rBo
-#define rCu R15
-
-#define MOVQ_RBI_RCE MOVQ rBi, rCe
-#define XORQ_RT1_RCA XORQ rT1, rCa
-#define XORQ_RT1_RCE XORQ rT1, rCe
-#define XORQ_RBA_RCU XORQ rBa, rCu
-#define XORQ_RBE_RCU XORQ rBe, rCu
-#define XORQ_RDU_RCU XORQ rDu, rCu
-#define XORQ_RDA_RCA XORQ rDa, rCa
-#define XORQ_RDE_RCE XORQ rDe, rCe
-
-#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \
- /* Prepare round */ \
- MOVQ rCe, rDa; \
- ROLQ $1, rDa; \
- \
- MOVQ _bi(iState), rCi; \
- XORQ _gi(iState), rDi; \
- XORQ rCu, rDa; \
- XORQ _ki(iState), rCi; \
- XORQ _mi(iState), rDi; \
- XORQ rDi, rCi; \
- \
- MOVQ rCi, rDe; \
- ROLQ $1, rDe; \
- \
- MOVQ _bo(iState), rCo; \
- XORQ _go(iState), rDo; \
- XORQ rCa, rDe; \
- XORQ _ko(iState), rCo; \
- XORQ _mo(iState), rDo; \
- XORQ rDo, rCo; \
- \
- MOVQ rCo, rDi; \
- ROLQ $1, rDi; \
- \
- MOVQ rCu, rDo; \
- XORQ rCe, rDi; \
- ROLQ $1, rDo; \
- \
- MOVQ rCa, rDu; \
- XORQ rCi, rDo; \
- ROLQ $1, rDu; \
- \
- /* Result b */ \
- MOVQ _ba(iState), rBa; \
- MOVQ _ge(iState), rBe; \
- XORQ rCo, rDu; \
- MOVQ _ki(iState), rBi; \
- MOVQ _mo(iState), rBo; \
- MOVQ _su(iState), rBu; \
- XORQ rDe, rBe; \
- ROLQ $44, rBe; \
- XORQ rDi, rBi; \
- XORQ rDa, rBa; \
- ROLQ $43, rBi; \
- \
- MOVQ rBe, rCa; \
- MOVQ rc, rT1; \
- ORQ rBi, rCa; \
- XORQ rBa, rT1; \
- XORQ rT1, rCa; \
- MOVQ rCa, _ba(oState); \
- \
- XORQ rDu, rBu; \
- ROLQ $14, rBu; \
- MOVQ rBa, rCu; \
- ANDQ rBe, rCu; \
- XORQ rBu, rCu; \
- MOVQ rCu, _bu(oState); \
- \
- XORQ rDo, rBo; \
- ROLQ $21, rBo; \
- MOVQ rBo, rT1; \
- ANDQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _bi(oState); \
- \
- NOTQ rBi; \
- ORQ rBa, rBu; \
- ORQ rBo, rBi; \
- XORQ rBo, rBu; \
- XORQ rBe, rBi; \
- MOVQ rBu, _bo(oState); \
- MOVQ rBi, _be(oState); \
- B_RBI_RCE; \
- \
- /* Result g */ \
- MOVQ _gu(iState), rBe; \
- XORQ rDu, rBe; \
- MOVQ _ka(iState), rBi; \
- ROLQ $20, rBe; \
- XORQ rDa, rBi; \
- ROLQ $3, rBi; \
- MOVQ _bo(iState), rBa; \
- MOVQ rBe, rT1; \
- ORQ rBi, rT1; \
- XORQ rDo, rBa; \
- MOVQ _me(iState), rBo; \
- MOVQ _si(iState), rBu; \
- ROLQ $28, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ga(oState); \
- G_RT1_RCA; \
- \
- XORQ rDe, rBo; \
- ROLQ $45, rBo; \
- MOVQ rBi, rT1; \
- ANDQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _ge(oState); \
- G_RT1_RCE; \
- \
- XORQ rDi, rBu; \
- ROLQ $61, rBu; \
- MOVQ rBu, rT1; \
- ORQ rBa, rT1; \
- XORQ rBo, rT1; \
- MOVQ rT1, _go(oState); \
- \
- ANDQ rBe, rBa; \
- XORQ rBu, rBa; \
- MOVQ rBa, _gu(oState); \
- NOTQ rBu; \
- G_RBA_RCU; \
- \
- ORQ rBu, rBo; \
- XORQ rBi, rBo; \
- MOVQ rBo, _gi(oState); \
- \
- /* Result k */ \
- MOVQ _be(iState), rBa; \
- MOVQ _gi(iState), rBe; \
- MOVQ _ko(iState), rBi; \
- MOVQ _mu(iState), rBo; \
- MOVQ _sa(iState), rBu; \
- XORQ rDi, rBe; \
- ROLQ $6, rBe; \
- XORQ rDo, rBi; \
- ROLQ $25, rBi; \
- MOVQ rBe, rT1; \
- ORQ rBi, rT1; \
- XORQ rDe, rBa; \
- ROLQ $1, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ka(oState); \
- K_RT1_RCA; \
- \
- XORQ rDu, rBo; \
- ROLQ $8, rBo; \
- MOVQ rBi, rT1; \
- ANDQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _ke(oState); \
- K_RT1_RCE; \
- \
- XORQ rDa, rBu; \
- ROLQ $18, rBu; \
- NOTQ rBo; \
- MOVQ rBo, rT1; \
- ANDQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _ki(oState); \
- \
- MOVQ rBu, rT1; \
- ORQ rBa, rT1; \
- XORQ rBo, rT1; \
- MOVQ rT1, _ko(oState); \
- \
- ANDQ rBe, rBa; \
- XORQ rBu, rBa; \
- MOVQ rBa, _ku(oState); \
- K_RBA_RCU; \
- \
- /* Result m */ \
- MOVQ _ga(iState), rBe; \
- XORQ rDa, rBe; \
- MOVQ _ke(iState), rBi; \
- ROLQ $36, rBe; \
- XORQ rDe, rBi; \
- MOVQ _bu(iState), rBa; \
- ROLQ $10, rBi; \
- MOVQ rBe, rT1; \
- MOVQ _mi(iState), rBo; \
- ANDQ rBi, rT1; \
- XORQ rDu, rBa; \
- MOVQ _so(iState), rBu; \
- ROLQ $27, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ma(oState); \
- M_RT1_RCA; \
- \
- XORQ rDi, rBo; \
- ROLQ $15, rBo; \
- MOVQ rBi, rT1; \
- ORQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _me(oState); \
- M_RT1_RCE; \
- \
- XORQ rDo, rBu; \
- ROLQ $56, rBu; \
- NOTQ rBo; \
- MOVQ rBo, rT1; \
- ORQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _mi(oState); \
- \
- ORQ rBa, rBe; \
- XORQ rBu, rBe; \
- MOVQ rBe, _mu(oState); \
- \
- ANDQ rBa, rBu; \
- XORQ rBo, rBu; \
- MOVQ rBu, _mo(oState); \
- M_RBE_RCU; \
- \
- /* Result s */ \
- MOVQ _bi(iState), rBa; \
- MOVQ _go(iState), rBe; \
- MOVQ _ku(iState), rBi; \
- XORQ rDi, rBa; \
- MOVQ _ma(iState), rBo; \
- ROLQ $62, rBa; \
- XORQ rDo, rBe; \
- MOVQ _se(iState), rBu; \
- ROLQ $55, rBe; \
- \
- XORQ rDu, rBi; \
- MOVQ rBa, rDu; \
- XORQ rDe, rBu; \
- ROLQ $2, rBu; \
- ANDQ rBe, rDu; \
- XORQ rBu, rDu; \
- MOVQ rDu, _su(oState); \
- \
- ROLQ $39, rBi; \
- S_RDU_RCU; \
- NOTQ rBe; \
- XORQ rDa, rBo; \
- MOVQ rBe, rDa; \
- ANDQ rBi, rDa; \
- XORQ rBa, rDa; \
- MOVQ rDa, _sa(oState); \
- S_RDA_RCA; \
- \
- ROLQ $41, rBo; \
- MOVQ rBi, rDe; \
- ORQ rBo, rDe; \
- XORQ rBe, rDe; \
- MOVQ rDe, _se(oState); \
- S_RDE_RCE; \
- \
- MOVQ rBo, rDi; \
- MOVQ rBu, rDo; \
- ANDQ rBu, rDi; \
- ORQ rBa, rDo; \
- XORQ rBi, rDi; \
- XORQ rBo, rDo; \
- MOVQ rDi, _si(oState); \
- MOVQ rDo, _so(oState) \
-
-// func keccakF1600(state *[25]uint64)
-TEXT ·keccakF1600(SB), 0, $200-8
- MOVQ state+0(FP), rpState
-
- // Convert the user state into an internal state
- NOTQ _be(rpState)
- NOTQ _bi(rpState)
- NOTQ _go(rpState)
- NOTQ _ki(rpState)
- NOTQ _mi(rpState)
- NOTQ _sa(rpState)
-
- // Execute the KeccakF permutation
- MOVQ _ba(rpState), rCa
- MOVQ _be(rpState), rCe
- MOVQ _bu(rpState), rCu
-
- XORQ _ga(rpState), rCa
- XORQ _ge(rpState), rCe
- XORQ _gu(rpState), rCu
-
- XORQ _ka(rpState), rCa
- XORQ _ke(rpState), rCe
- XORQ _ku(rpState), rCu
-
- XORQ _ma(rpState), rCa
- XORQ _me(rpState), rCe
- XORQ _mu(rpState), rCu
-
- XORQ _sa(rpState), rCa
- XORQ _se(rpState), rCe
- MOVQ _si(rpState), rDi
- MOVQ _so(rpState), rDo
- XORQ _su(rpState), rCu
-
- mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP)
-
- // Revert the internal state to the user state
- NOTQ _be(rpState)
- NOTQ _bi(rpState)
- NOTQ _go(rpState)
- NOTQ _ki(rpState)
- NOTQ _mi(rpState)
- NOTQ _sa(rpState)
-
- RET
diff --git a/vendor/golang.org/x/crypto/sha3/register.go b/vendor/golang.org/x/crypto/sha3/register.go
deleted file mode 100644
index 8b4453a..0000000
--- a/vendor/golang.org/x/crypto/sha3/register.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.4
-// +build go1.4
-
-package sha3
-
-import (
- "crypto"
-)
-
-func init() {
- crypto.RegisterHash(crypto.SHA3_224, New224)
- crypto.RegisterHash(crypto.SHA3_256, New256)
- crypto.RegisterHash(crypto.SHA3_384, New384)
- crypto.RegisterHash(crypto.SHA3_512, New512)
-}
diff --git a/vendor/golang.org/x/crypto/sha3/sha3.go b/vendor/golang.org/x/crypto/sha3/sha3.go
deleted file mode 100644
index fa182be..0000000
--- a/vendor/golang.org/x/crypto/sha3/sha3.go
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sha3
-
-// spongeDirection indicates the direction bytes are flowing through the sponge.
-type spongeDirection int
-
-const (
- // spongeAbsorbing indicates that the sponge is absorbing input.
- spongeAbsorbing spongeDirection = iota
- // spongeSqueezing indicates that the sponge is being squeezed.
- spongeSqueezing
-)
-
-const (
- // maxRate is the maximum size of the internal buffer. SHAKE-256
- // currently needs the largest buffer.
- maxRate = 168
-)
-
-type state struct {
- // Generic sponge components.
- a [25]uint64 // main state of the hash
- buf []byte // points into storage
- rate int // the number of bytes of state to use
-
- // dsbyte contains the "domain separation" bits and the first bit of
- // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the
- // SHA-3 and SHAKE functions by appending bitstrings to the message.
- // Using a little-endian bit-ordering convention, these are "01" for SHA-3
- // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the
- // padding rule from section 5.1 is applied to pad the message to a multiple
- // of the rate, which involves adding a "1" bit, zero or more "0" bits, and
- // a final "1" bit. We merge the first "1" bit from the padding into dsbyte,
- // giving 00000110b (0x06) and 00011111b (0x1f).
- // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf
- // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and
- // Extendable-Output Functions (May 2014)"
- dsbyte byte
-
- storage storageBuf
-
- // Specific to SHA-3 and SHAKE.
- outputLen int // the default output size in bytes
- state spongeDirection // whether the sponge is absorbing or squeezing
-}
-
-// BlockSize returns the rate of sponge underlying this hash function.
-func (d *state) BlockSize() int { return d.rate }
-
-// Size returns the output size of the hash function in bytes.
-func (d *state) Size() int { return d.outputLen }
-
-// Reset clears the internal state by zeroing the sponge state and
-// the byte buffer, and setting Sponge.state to absorbing.
-func (d *state) Reset() {
- // Zero the permutation's state.
- for i := range d.a {
- d.a[i] = 0
- }
- d.state = spongeAbsorbing
- d.buf = d.storage.asBytes()[:0]
-}
-
-func (d *state) clone() *state {
- ret := *d
- if ret.state == spongeAbsorbing {
- ret.buf = ret.storage.asBytes()[:len(ret.buf)]
- } else {
- ret.buf = ret.storage.asBytes()[d.rate-cap(d.buf) : d.rate]
- }
-
- return &ret
-}
-
-// permute applies the KeccakF-1600 permutation. It handles
-// any input-output buffering.
-func (d *state) permute() {
- switch d.state {
- case spongeAbsorbing:
- // If we're absorbing, we need to xor the input into the state
- // before applying the permutation.
- xorIn(d, d.buf)
- d.buf = d.storage.asBytes()[:0]
- keccakF1600(&d.a)
- case spongeSqueezing:
- // If we're squeezing, we need to apply the permutation before
- // copying more output.
- keccakF1600(&d.a)
- d.buf = d.storage.asBytes()[:d.rate]
- copyOut(d, d.buf)
- }
-}
-
-// pads appends the domain separation bits in dsbyte, applies
-// the multi-bitrate 10..1 padding rule, and permutes the state.
-func (d *state) padAndPermute(dsbyte byte) {
- if d.buf == nil {
- d.buf = d.storage.asBytes()[:0]
- }
- // Pad with this instance's domain-separator bits. We know that there's
- // at least one byte of space in d.buf because, if it were full,
- // permute would have been called to empty it. dsbyte also contains the
- // first one bit for the padding. See the comment in the state struct.
- d.buf = append(d.buf, dsbyte)
- zerosStart := len(d.buf)
- d.buf = d.storage.asBytes()[:d.rate]
- for i := zerosStart; i < d.rate; i++ {
- d.buf[i] = 0
- }
- // This adds the final one bit for the padding. Because of the way that
- // bits are numbered from the LSB upwards, the final bit is the MSB of
- // the last byte.
- d.buf[d.rate-1] ^= 0x80
- // Apply the permutation
- d.permute()
- d.state = spongeSqueezing
- d.buf = d.storage.asBytes()[:d.rate]
- copyOut(d, d.buf)
-}
-
-// Write absorbs more data into the hash's state. It produces an error
-// if more data is written to the ShakeHash after writing
-func (d *state) Write(p []byte) (written int, err error) {
- if d.state != spongeAbsorbing {
- panic("sha3: write to sponge after read")
- }
- if d.buf == nil {
- d.buf = d.storage.asBytes()[:0]
- }
- written = len(p)
-
- for len(p) > 0 {
- if len(d.buf) == 0 && len(p) >= d.rate {
- // The fast path; absorb a full "rate" bytes of input and apply the permutation.
- xorIn(d, p[:d.rate])
- p = p[d.rate:]
- keccakF1600(&d.a)
- } else {
- // The slow path; buffer the input until we can fill the sponge, and then xor it in.
- todo := d.rate - len(d.buf)
- if todo > len(p) {
- todo = len(p)
- }
- d.buf = append(d.buf, p[:todo]...)
- p = p[todo:]
-
- // If the sponge is full, apply the permutation.
- if len(d.buf) == d.rate {
- d.permute()
- }
- }
- }
-
- return
-}
-
-// Read squeezes an arbitrary number of bytes from the sponge.
-func (d *state) Read(out []byte) (n int, err error) {
- // If we're still absorbing, pad and apply the permutation.
- if d.state == spongeAbsorbing {
- d.padAndPermute(d.dsbyte)
- }
-
- n = len(out)
-
- // Now, do the squeezing.
- for len(out) > 0 {
- n := copy(out, d.buf)
- d.buf = d.buf[n:]
- out = out[n:]
-
- // Apply the permutation if we've squeezed the sponge dry.
- if len(d.buf) == 0 {
- d.permute()
- }
- }
-
- return
-}
-
-// Sum applies padding to the hash state and then squeezes out the desired
-// number of output bytes.
-func (d *state) Sum(in []byte) []byte {
- // Make a copy of the original hash so that caller can keep writing
- // and summing.
- dup := d.clone()
- hash := make([]byte, dup.outputLen)
- dup.Read(hash)
- return append(in, hash...)
-}
diff --git a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go b/vendor/golang.org/x/crypto/sha3/sha3_s390x.go
deleted file mode 100644
index 63a3edb..0000000
--- a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc && !purego
-// +build gc,!purego
-
-package sha3
-
-// This file contains code for using the 'compute intermediate
-// message digest' (KIMD) and 'compute last message digest' (KLMD)
-// instructions to compute SHA-3 and SHAKE hashes on IBM Z.
-
-import (
- "hash"
-
- "golang.org/x/sys/cpu"
-)
-
-// codes represent 7-bit KIMD/KLMD function codes as defined in
-// the Principles of Operation.
-type code uint64
-
-const (
- // function codes for KIMD/KLMD
- sha3_224 code = 32
- sha3_256 = 33
- sha3_384 = 34
- sha3_512 = 35
- shake_128 = 36
- shake_256 = 37
- nopad = 0x100
-)
-
-// kimd is a wrapper for the 'compute intermediate message digest' instruction.
-// src must be a multiple of the rate for the given function code.
-//
-//go:noescape
-func kimd(function code, chain *[200]byte, src []byte)
-
-// klmd is a wrapper for the 'compute last message digest' instruction.
-// src padding is handled by the instruction.
-//
-//go:noescape
-func klmd(function code, chain *[200]byte, dst, src []byte)
-
-type asmState struct {
- a [200]byte // 1600 bit state
- buf []byte // care must be taken to ensure cap(buf) is a multiple of rate
- rate int // equivalent to block size
- storage [3072]byte // underlying storage for buf
- outputLen int // output length if fixed, 0 if not
- function code // KIMD/KLMD function code
- state spongeDirection // whether the sponge is absorbing or squeezing
-}
-
-func newAsmState(function code) *asmState {
- var s asmState
- s.function = function
- switch function {
- case sha3_224:
- s.rate = 144
- s.outputLen = 28
- case sha3_256:
- s.rate = 136
- s.outputLen = 32
- case sha3_384:
- s.rate = 104
- s.outputLen = 48
- case sha3_512:
- s.rate = 72
- s.outputLen = 64
- case shake_128:
- s.rate = 168
- case shake_256:
- s.rate = 136
- default:
- panic("sha3: unrecognized function code")
- }
-
- // limit s.buf size to a multiple of s.rate
- s.resetBuf()
- return &s
-}
-
-func (s *asmState) clone() *asmState {
- c := *s
- c.buf = c.storage[:len(s.buf):cap(s.buf)]
- return &c
-}
-
-// copyIntoBuf copies b into buf. It will panic if there is not enough space to
-// store all of b.
-func (s *asmState) copyIntoBuf(b []byte) {
- bufLen := len(s.buf)
- s.buf = s.buf[:len(s.buf)+len(b)]
- copy(s.buf[bufLen:], b)
-}
-
-// resetBuf points buf at storage, sets the length to 0 and sets cap to be a
-// multiple of the rate.
-func (s *asmState) resetBuf() {
- max := (cap(s.storage) / s.rate) * s.rate
- s.buf = s.storage[:0:max]
-}
-
-// Write (via the embedded io.Writer interface) adds more data to the running hash.
-// It never returns an error.
-func (s *asmState) Write(b []byte) (int, error) {
- if s.state != spongeAbsorbing {
- panic("sha3: write to sponge after read")
- }
- length := len(b)
- for len(b) > 0 {
- if len(s.buf) == 0 && len(b) >= cap(s.buf) {
- // Hash the data directly and push any remaining bytes
- // into the buffer.
- remainder := len(b) % s.rate
- kimd(s.function, &s.a, b[:len(b)-remainder])
- if remainder != 0 {
- s.copyIntoBuf(b[len(b)-remainder:])
- }
- return length, nil
- }
-
- if len(s.buf) == cap(s.buf) {
- // flush the buffer
- kimd(s.function, &s.a, s.buf)
- s.buf = s.buf[:0]
- }
-
- // copy as much as we can into the buffer
- n := len(b)
- if len(b) > cap(s.buf)-len(s.buf) {
- n = cap(s.buf) - len(s.buf)
- }
- s.copyIntoBuf(b[:n])
- b = b[n:]
- }
- return length, nil
-}
-
-// Read squeezes an arbitrary number of bytes from the sponge.
-func (s *asmState) Read(out []byte) (n int, err error) {
- n = len(out)
-
- // need to pad if we were absorbing
- if s.state == spongeAbsorbing {
- s.state = spongeSqueezing
-
- // write hash directly into out if possible
- if len(out)%s.rate == 0 {
- klmd(s.function, &s.a, out, s.buf) // len(out) may be 0
- s.buf = s.buf[:0]
- return
- }
-
- // write hash into buffer
- max := cap(s.buf)
- if max > len(out) {
- max = (len(out)/s.rate)*s.rate + s.rate
- }
- klmd(s.function, &s.a, s.buf[:max], s.buf)
- s.buf = s.buf[:max]
- }
-
- for len(out) > 0 {
- // flush the buffer
- if len(s.buf) != 0 {
- c := copy(out, s.buf)
- out = out[c:]
- s.buf = s.buf[c:]
- continue
- }
-
- // write hash directly into out if possible
- if len(out)%s.rate == 0 {
- klmd(s.function|nopad, &s.a, out, nil)
- return
- }
-
- // write hash into buffer
- s.resetBuf()
- if cap(s.buf) > len(out) {
- s.buf = s.buf[:(len(out)/s.rate)*s.rate+s.rate]
- }
- klmd(s.function|nopad, &s.a, s.buf, nil)
- }
- return
-}
-
-// Sum appends the current hash to b and returns the resulting slice.
-// It does not change the underlying hash state.
-func (s *asmState) Sum(b []byte) []byte {
- if s.outputLen == 0 {
- panic("sha3: cannot call Sum on SHAKE functions")
- }
-
- // Copy the state to preserve the original.
- a := s.a
-
- // Hash the buffer. Note that we don't clear it because we
- // aren't updating the state.
- klmd(s.function, &a, nil, s.buf)
- return append(b, a[:s.outputLen]...)
-}
-
-// Reset resets the Hash to its initial state.
-func (s *asmState) Reset() {
- for i := range s.a {
- s.a[i] = 0
- }
- s.resetBuf()
- s.state = spongeAbsorbing
-}
-
-// Size returns the number of bytes Sum will return.
-func (s *asmState) Size() int {
- return s.outputLen
-}
-
-// BlockSize returns the hash's underlying block size.
-// The Write method must be able to accept any amount
-// of data, but it may operate more efficiently if all writes
-// are a multiple of the block size.
-func (s *asmState) BlockSize() int {
- return s.rate
-}
-
-// Clone returns a copy of the ShakeHash in its current state.
-func (s *asmState) Clone() ShakeHash {
- return s.clone()
-}
-
-// new224Asm returns an assembly implementation of SHA3-224 if available,
-// otherwise it returns nil.
-func new224Asm() hash.Hash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(sha3_224)
- }
- return nil
-}
-
-// new256Asm returns an assembly implementation of SHA3-256 if available,
-// otherwise it returns nil.
-func new256Asm() hash.Hash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(sha3_256)
- }
- return nil
-}
-
-// new384Asm returns an assembly implementation of SHA3-384 if available,
-// otherwise it returns nil.
-func new384Asm() hash.Hash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(sha3_384)
- }
- return nil
-}
-
-// new512Asm returns an assembly implementation of SHA3-512 if available,
-// otherwise it returns nil.
-func new512Asm() hash.Hash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(sha3_512)
- }
- return nil
-}
-
-// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
-// otherwise it returns nil.
-func newShake128Asm() ShakeHash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(shake_128)
- }
- return nil
-}
-
-// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
-// otherwise it returns nil.
-func newShake256Asm() ShakeHash {
- if cpu.S390X.HasSHA3 {
- return newAsmState(shake_256)
- }
- return nil
-}
diff --git a/vendor/golang.org/x/crypto/sha3/sha3_s390x.s b/vendor/golang.org/x/crypto/sha3/sha3_s390x.s
deleted file mode 100644
index a0e051b..0000000
--- a/vendor/golang.org/x/crypto/sha3/sha3_s390x.s
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc && !purego
-// +build gc,!purego
-
-#include "textflag.h"
-
-// func kimd(function code, chain *[200]byte, src []byte)
-TEXT ·kimd(SB), NOFRAME|NOSPLIT, $0-40
- MOVD function+0(FP), R0
- MOVD chain+8(FP), R1
- LMG src+16(FP), R2, R3 // R2=base, R3=len
-
-continue:
- WORD $0xB93E0002 // KIMD --, R2
- BVS continue // continue if interrupted
- MOVD $0, R0 // reset R0 for pre-go1.8 compilers
- RET
-
-// func klmd(function code, chain *[200]byte, dst, src []byte)
-TEXT ·klmd(SB), NOFRAME|NOSPLIT, $0-64
- // TODO: SHAKE support
- MOVD function+0(FP), R0
- MOVD chain+8(FP), R1
- LMG dst+16(FP), R2, R3 // R2=base, R3=len
- LMG src+40(FP), R4, R5 // R4=base, R5=len
-
-continue:
- WORD $0xB93F0024 // KLMD R2, R4
- BVS continue // continue if interrupted
- MOVD $0, R0 // reset R0 for pre-go1.8 compilers
- RET
diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go
deleted file mode 100644
index d7be295..0000000
--- a/vendor/golang.org/x/crypto/sha3/shake.go
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sha3
-
-// This file defines the ShakeHash interface, and provides
-// functions for creating SHAKE and cSHAKE instances, as well as utility
-// functions for hashing bytes to arbitrary-length output.
-//
-//
-// SHAKE implementation is based on FIPS PUB 202 [1]
-// cSHAKE implementations is based on NIST SP 800-185 [2]
-//
-// [1] https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
-// [2] https://doi.org/10.6028/NIST.SP.800-185
-
-import (
- "encoding/binary"
- "io"
-)
-
-// ShakeHash defines the interface to hash functions that
-// support arbitrary-length output.
-type ShakeHash interface {
- // Write absorbs more data into the hash's state. It panics if input is
- // written to it after output has been read from it.
- io.Writer
-
- // Read reads more output from the hash; reading affects the hash's
- // state. (ShakeHash.Read is thus very different from Hash.Sum)
- // It never returns an error.
- io.Reader
-
- // Clone returns a copy of the ShakeHash in its current state.
- Clone() ShakeHash
-
- // Reset resets the ShakeHash to its initial state.
- Reset()
-}
-
-// cSHAKE specific context
-type cshakeState struct {
- *state // SHA-3 state context and Read/Write operations
-
- // initBlock is the cSHAKE specific initialization set of bytes. It is initialized
- // by newCShake function and stores concatenation of N followed by S, encoded
- // by the method specified in 3.3 of [1].
- // It is stored here in order for Reset() to be able to put context into
- // initial state.
- initBlock []byte
-}
-
-// Consts for configuring initial SHA-3 state
-const (
- dsbyteShake = 0x1f
- dsbyteCShake = 0x04
- rate128 = 168
- rate256 = 136
-)
-
-func bytepad(input []byte, w int) []byte {
- // leftEncode always returns max 9 bytes
- buf := make([]byte, 0, 9+len(input)+w)
- buf = append(buf, leftEncode(uint64(w))...)
- buf = append(buf, input...)
- padlen := w - (len(buf) % w)
- return append(buf, make([]byte, padlen)...)
-}
-
-func leftEncode(value uint64) []byte {
- var b [9]byte
- binary.BigEndian.PutUint64(b[1:], value)
- // Trim all but last leading zero bytes
- i := byte(1)
- for i < 8 && b[i] == 0 {
- i++
- }
- // Prepend number of encoded bytes
- b[i-1] = 9 - i
- return b[i-1:]
-}
-
-func newCShake(N, S []byte, rate int, dsbyte byte) ShakeHash {
- c := cshakeState{state: &state{rate: rate, dsbyte: dsbyte}}
-
- // leftEncode returns max 9 bytes
- c.initBlock = make([]byte, 0, 9*2+len(N)+len(S))
- c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...)
- c.initBlock = append(c.initBlock, N...)
- c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...)
- c.initBlock = append(c.initBlock, S...)
- c.Write(bytepad(c.initBlock, c.rate))
- return &c
-}
-
-// Reset resets the hash to initial state.
-func (c *cshakeState) Reset() {
- c.state.Reset()
- c.Write(bytepad(c.initBlock, c.rate))
-}
-
-// Clone returns copy of a cSHAKE context within its current state.
-func (c *cshakeState) Clone() ShakeHash {
- b := make([]byte, len(c.initBlock))
- copy(b, c.initBlock)
- return &cshakeState{state: c.clone(), initBlock: b}
-}
-
-// Clone returns copy of SHAKE context within its current state.
-func (c *state) Clone() ShakeHash {
- return c.clone()
-}
-
-// NewShake128 creates a new SHAKE128 variable-output-length ShakeHash.
-// Its generic security strength is 128 bits against all attacks if at
-// least 32 bytes of its output are used.
-func NewShake128() ShakeHash {
- if h := newShake128Asm(); h != nil {
- return h
- }
- return &state{rate: rate128, dsbyte: dsbyteShake}
-}
-
-// NewShake256 creates a new SHAKE256 variable-output-length ShakeHash.
-// Its generic security strength is 256 bits against all attacks if
-// at least 64 bytes of its output are used.
-func NewShake256() ShakeHash {
- if h := newShake256Asm(); h != nil {
- return h
- }
- return &state{rate: rate256, dsbyte: dsbyteShake}
-}
-
-// NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash,
-// a customizable variant of SHAKE128.
-// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is
-// desired. S is a customization byte string used for domain separation - two cSHAKE
-// computations on same input with different S yield unrelated outputs.
-// When N and S are both empty, this is equivalent to NewShake128.
-func NewCShake128(N, S []byte) ShakeHash {
- if len(N) == 0 && len(S) == 0 {
- return NewShake128()
- }
- return newCShake(N, S, rate128, dsbyteCShake)
-}
-
-// NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash,
-// a customizable variant of SHAKE256.
-// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is
-// desired. S is a customization byte string used for domain separation - two cSHAKE
-// computations on same input with different S yield unrelated outputs.
-// When N and S are both empty, this is equivalent to NewShake256.
-func NewCShake256(N, S []byte) ShakeHash {
- if len(N) == 0 && len(S) == 0 {
- return NewShake256()
- }
- return newCShake(N, S, rate256, dsbyteCShake)
-}
-
-// ShakeSum128 writes an arbitrary-length digest of data into hash.
-func ShakeSum128(hash, data []byte) {
- h := NewShake128()
- h.Write(data)
- h.Read(hash)
-}
-
-// ShakeSum256 writes an arbitrary-length digest of data into hash.
-func ShakeSum256(hash, data []byte) {
- h := NewShake256()
- h.Write(data)
- h.Read(hash)
-}
diff --git a/vendor/golang.org/x/crypto/sha3/shake_generic.go b/vendor/golang.org/x/crypto/sha3/shake_generic.go
deleted file mode 100644
index 5c0710e..0000000
--- a/vendor/golang.org/x/crypto/sha3/shake_generic.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !gc || purego || !s390x
-// +build !gc purego !s390x
-
-package sha3
-
-// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
-// otherwise it returns nil.
-func newShake128Asm() ShakeHash {
- return nil
-}
-
-// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
-// otherwise it returns nil.
-func newShake256Asm() ShakeHash {
- return nil
-}
diff --git a/vendor/golang.org/x/crypto/sha3/xor.go b/vendor/golang.org/x/crypto/sha3/xor.go
deleted file mode 100644
index 59c8eb9..0000000
--- a/vendor/golang.org/x/crypto/sha3/xor.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (!amd64 && !386 && !ppc64le) || purego
-// +build !amd64,!386,!ppc64le purego
-
-package sha3
-
-// A storageBuf is an aligned array of maxRate bytes.
-type storageBuf [maxRate]byte
-
-func (b *storageBuf) asBytes() *[maxRate]byte {
- return (*[maxRate]byte)(b)
-}
-
-var (
- xorIn = xorInGeneric
- copyOut = copyOutGeneric
- xorInUnaligned = xorInGeneric
- copyOutUnaligned = copyOutGeneric
-)
-
-const xorImplementationUnaligned = "generic"
diff --git a/vendor/golang.org/x/crypto/sha3/xor_generic.go b/vendor/golang.org/x/crypto/sha3/xor_generic.go
deleted file mode 100644
index 8d94771..0000000
--- a/vendor/golang.org/x/crypto/sha3/xor_generic.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sha3
-
-import "encoding/binary"
-
-// xorInGeneric xors the bytes in buf into the state; it
-// makes no non-portable assumptions about memory layout
-// or alignment.
-func xorInGeneric(d *state, buf []byte) {
- n := len(buf) / 8
-
- for i := 0; i < n; i++ {
- a := binary.LittleEndian.Uint64(buf)
- d.a[i] ^= a
- buf = buf[8:]
- }
-}
-
-// copyOutGeneric copies uint64s to a byte buffer.
-func copyOutGeneric(d *state, b []byte) {
- for i := 0; len(b) >= 8; i++ {
- binary.LittleEndian.PutUint64(b, d.a[i])
- b = b[8:]
- }
-}
diff --git a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go b/vendor/golang.org/x/crypto/sha3/xor_unaligned.go
deleted file mode 100644
index 1ce6062..0000000
--- a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (amd64 || 386 || ppc64le) && !purego
-// +build amd64 386 ppc64le
-// +build !purego
-
-package sha3
-
-import "unsafe"
-
-// A storageBuf is an aligned array of maxRate bytes.
-type storageBuf [maxRate / 8]uint64
-
-func (b *storageBuf) asBytes() *[maxRate]byte {
- return (*[maxRate]byte)(unsafe.Pointer(b))
-}
-
-// xorInUnaligned uses unaligned reads and writes to update d.a to contain d.a
-// XOR buf.
-func xorInUnaligned(d *state, buf []byte) {
- n := len(buf)
- bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0]))[: n/8 : n/8]
- if n >= 72 {
- d.a[0] ^= bw[0]
- d.a[1] ^= bw[1]
- d.a[2] ^= bw[2]
- d.a[3] ^= bw[3]
- d.a[4] ^= bw[4]
- d.a[5] ^= bw[5]
- d.a[6] ^= bw[6]
- d.a[7] ^= bw[7]
- d.a[8] ^= bw[8]
- }
- if n >= 104 {
- d.a[9] ^= bw[9]
- d.a[10] ^= bw[10]
- d.a[11] ^= bw[11]
- d.a[12] ^= bw[12]
- }
- if n >= 136 {
- d.a[13] ^= bw[13]
- d.a[14] ^= bw[14]
- d.a[15] ^= bw[15]
- d.a[16] ^= bw[16]
- }
- if n >= 144 {
- d.a[17] ^= bw[17]
- }
- if n >= 168 {
- d.a[18] ^= bw[18]
- d.a[19] ^= bw[19]
- d.a[20] ^= bw[20]
- }
-}
-
-func copyOutUnaligned(d *state, buf []byte) {
- ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0]))
- copy(buf, ab[:])
-}
-
-var (
- xorIn = xorInUnaligned
- copyOut = copyOutUnaligned
-)
-
-const xorImplementationUnaligned = "unaligned"
diff --git a/vendor/golang.org/x/crypto/tea/cipher.go b/vendor/golang.org/x/crypto/tea/cipher.go
deleted file mode 100644
index c1ff90e..0000000
--- a/vendor/golang.org/x/crypto/tea/cipher.go
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package tea implements the TEA algorithm, as defined in Needham and
-// Wheeler's 1994 technical report, “TEA, a Tiny Encryption Algorithm”. See
-// http://www.cix.co.uk/~klockstone/tea.pdf for details.
-//
-// TEA is a legacy cipher and its short block size makes it vulnerable to
-// birthday bound attacks (see https://sweet32.info). It should only be used
-// where compatibility with legacy systems, not security, is the goal.
-//
-// Deprecated: any new system should use AES (from crypto/aes, if necessary in
-// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
-// golang.org/x/crypto/chacha20poly1305).
-package tea
-
-import (
- "crypto/cipher"
- "encoding/binary"
- "errors"
-)
-
-const (
- // BlockSize is the size of a TEA block, in bytes.
- BlockSize = 8
-
- // KeySize is the size of a TEA key, in bytes.
- KeySize = 16
-
- // delta is the TEA key schedule constant.
- delta = 0x9e3779b9
-
- // numRounds is the standard number of rounds in TEA.
- numRounds = 64
-)
-
-// tea is an instance of the TEA cipher with a particular key.
-type tea struct {
- key [16]byte
- rounds int
-}
-
-// NewCipher returns an instance of the TEA cipher with the standard number of
-// rounds. The key argument must be 16 bytes long.
-func NewCipher(key []byte) (cipher.Block, error) {
- return NewCipherWithRounds(key, numRounds)
-}
-
-// NewCipherWithRounds returns an instance of the TEA cipher with a given
-// number of rounds, which must be even. The key argument must be 16 bytes
-// long.
-func NewCipherWithRounds(key []byte, rounds int) (cipher.Block, error) {
- if len(key) != 16 {
- return nil, errors.New("tea: incorrect key size")
- }
-
- if rounds&1 != 0 {
- return nil, errors.New("tea: odd number of rounds specified")
- }
-
- c := &tea{
- rounds: rounds,
- }
- copy(c.key[:], key)
-
- return c, nil
-}
-
-// BlockSize returns the TEA block size, which is eight bytes. It is necessary
-// to satisfy the Block interface in the package "crypto/cipher".
-func (*tea) BlockSize() int {
- return BlockSize
-}
-
-// Encrypt encrypts the 8 byte buffer src using the key in t and stores the
-// result in dst. Note that for amounts of data larger than a block, it is not
-// safe to just call Encrypt on successive blocks; instead, use an encryption
-// mode like CBC (see crypto/cipher/cbc.go).
-func (t *tea) Encrypt(dst, src []byte) {
- e := binary.BigEndian
- v0, v1 := e.Uint32(src), e.Uint32(src[4:])
- k0, k1, k2, k3 := e.Uint32(t.key[0:]), e.Uint32(t.key[4:]), e.Uint32(t.key[8:]), e.Uint32(t.key[12:])
-
- sum := uint32(0)
- delta := uint32(delta)
-
- for i := 0; i < t.rounds/2; i++ {
- sum += delta
- v0 += ((v1 << 4) + k0) ^ (v1 + sum) ^ ((v1 >> 5) + k1)
- v1 += ((v0 << 4) + k2) ^ (v0 + sum) ^ ((v0 >> 5) + k3)
- }
-
- e.PutUint32(dst, v0)
- e.PutUint32(dst[4:], v1)
-}
-
-// Decrypt decrypts the 8 byte buffer src using the key in t and stores the
-// result in dst.
-func (t *tea) Decrypt(dst, src []byte) {
- e := binary.BigEndian
- v0, v1 := e.Uint32(src), e.Uint32(src[4:])
- k0, k1, k2, k3 := e.Uint32(t.key[0:]), e.Uint32(t.key[4:]), e.Uint32(t.key[8:]), e.Uint32(t.key[12:])
-
- delta := uint32(delta)
- sum := delta * uint32(t.rounds/2) // in general, sum = delta * n
-
- for i := 0; i < t.rounds/2; i++ {
- v1 -= ((v0 << 4) + k2) ^ (v0 + sum) ^ ((v0 >> 5) + k3)
- v0 -= ((v1 << 4) + k0) ^ (v1 + sum) ^ ((v1 >> 5) + k1)
- sum -= delta
- }
-
- e.PutUint32(dst, v0)
- e.PutUint32(dst[4:], v1)
-}
diff --git a/vendor/golang.org/x/crypto/twofish/twofish.go b/vendor/golang.org/x/crypto/twofish/twofish.go
deleted file mode 100644
index 1197d75..0000000
--- a/vendor/golang.org/x/crypto/twofish/twofish.go
+++ /dev/null
@@ -1,348 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package twofish implements Bruce Schneier's Twofish encryption algorithm.
-//
-// Deprecated: Twofish is a legacy cipher and should not be used for new
-// applications. Also, this package does not and will not provide an optimized
-// implementation. Instead, use AES (from crypto/aes, if necessary in an AEAD
-// mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
-// golang.org/x/crypto/chacha20poly1305).
-package twofish // import "golang.org/x/crypto/twofish"
-
-// Twofish is defined in https://www.schneier.com/paper-twofish-paper.pdf [TWOFISH]
-
-// This code is a port of the LibTom C implementation.
-// See http://libtom.org/?page=features&newsitems=5&whatfile=crypt.
-// LibTomCrypt is free for all purposes under the public domain.
-// It was heavily inspired by the go blowfish package.
-
-import "strconv"
-
-// BlockSize is the constant block size of Twofish.
-const BlockSize = 16
-
-const mdsPolynomial = 0x169 // x^8 + x^6 + x^5 + x^3 + 1, see [TWOFISH] 4.2
-const rsPolynomial = 0x14d // x^8 + x^6 + x^3 + x^2 + 1, see [TWOFISH] 4.3
-
-// A Cipher is an instance of Twofish encryption using a particular key.
-type Cipher struct {
- s [4][256]uint32
- k [40]uint32
-}
-
-type KeySizeError int
-
-func (k KeySizeError) Error() string {
- return "crypto/twofish: invalid key size " + strconv.Itoa(int(k))
-}
-
-// NewCipher creates and returns a Cipher.
-// The key argument should be the Twofish key, 16, 24 or 32 bytes.
-func NewCipher(key []byte) (*Cipher, error) {
- keylen := len(key)
-
- if keylen != 16 && keylen != 24 && keylen != 32 {
- return nil, KeySizeError(keylen)
- }
-
- // k is the number of 64 bit words in key
- k := keylen / 8
-
- // Create the S[..] words
- var S [4 * 4]byte
- for i := 0; i < k; i++ {
- // Computes [y0 y1 y2 y3] = rs . [x0 x1 x2 x3 x4 x5 x6 x7]
- for j, rsRow := range rs {
- for k, rsVal := range rsRow {
- S[4*i+j] ^= gfMult(key[8*i+k], rsVal, rsPolynomial)
- }
- }
- }
-
- // Calculate subkeys
- c := new(Cipher)
- var tmp [4]byte
- for i := byte(0); i < 20; i++ {
- // A = h(p * 2x, Me)
- for j := range tmp {
- tmp[j] = 2 * i
- }
- A := h(tmp[:], key, 0)
-
- // B = rolc(h(p * (2x + 1), Mo), 8)
- for j := range tmp {
- tmp[j] = 2*i + 1
- }
- B := h(tmp[:], key, 1)
- B = rol(B, 8)
-
- c.k[2*i] = A + B
-
- // K[2i+1] = (A + 2B) <<< 9
- c.k[2*i+1] = rol(2*B+A, 9)
- }
-
- // Calculate sboxes
- switch k {
- case 2:
- for i := range c.s[0] {
- c.s[0][i] = mdsColumnMult(sbox[1][sbox[0][sbox[0][byte(i)]^S[0]]^S[4]], 0)
- c.s[1][i] = mdsColumnMult(sbox[0][sbox[0][sbox[1][byte(i)]^S[1]]^S[5]], 1)
- c.s[2][i] = mdsColumnMult(sbox[1][sbox[1][sbox[0][byte(i)]^S[2]]^S[6]], 2)
- c.s[3][i] = mdsColumnMult(sbox[0][sbox[1][sbox[1][byte(i)]^S[3]]^S[7]], 3)
- }
- case 3:
- for i := range c.s[0] {
- c.s[0][i] = mdsColumnMult(sbox[1][sbox[0][sbox[0][sbox[1][byte(i)]^S[0]]^S[4]]^S[8]], 0)
- c.s[1][i] = mdsColumnMult(sbox[0][sbox[0][sbox[1][sbox[1][byte(i)]^S[1]]^S[5]]^S[9]], 1)
- c.s[2][i] = mdsColumnMult(sbox[1][sbox[1][sbox[0][sbox[0][byte(i)]^S[2]]^S[6]]^S[10]], 2)
- c.s[3][i] = mdsColumnMult(sbox[0][sbox[1][sbox[1][sbox[0][byte(i)]^S[3]]^S[7]]^S[11]], 3)
- }
- default:
- for i := range c.s[0] {
- c.s[0][i] = mdsColumnMult(sbox[1][sbox[0][sbox[0][sbox[1][sbox[1][byte(i)]^S[0]]^S[4]]^S[8]]^S[12]], 0)
- c.s[1][i] = mdsColumnMult(sbox[0][sbox[0][sbox[1][sbox[1][sbox[0][byte(i)]^S[1]]^S[5]]^S[9]]^S[13]], 1)
- c.s[2][i] = mdsColumnMult(sbox[1][sbox[1][sbox[0][sbox[0][sbox[0][byte(i)]^S[2]]^S[6]]^S[10]]^S[14]], 2)
- c.s[3][i] = mdsColumnMult(sbox[0][sbox[1][sbox[1][sbox[0][sbox[1][byte(i)]^S[3]]^S[7]]^S[11]]^S[15]], 3)
- }
- }
-
- return c, nil
-}
-
-// BlockSize returns the Twofish block size, 16 bytes.
-func (c *Cipher) BlockSize() int { return BlockSize }
-
-// store32l stores src in dst in little-endian form.
-func store32l(dst []byte, src uint32) {
- dst[0] = byte(src)
- dst[1] = byte(src >> 8)
- dst[2] = byte(src >> 16)
- dst[3] = byte(src >> 24)
- return
-}
-
-// load32l reads a little-endian uint32 from src.
-func load32l(src []byte) uint32 {
- return uint32(src[0]) | uint32(src[1])<<8 | uint32(src[2])<<16 | uint32(src[3])<<24
-}
-
-// rol returns x after a left circular rotation of y bits.
-func rol(x, y uint32) uint32 {
- return (x << (y & 31)) | (x >> (32 - (y & 31)))
-}
-
-// ror returns x after a right circular rotation of y bits.
-func ror(x, y uint32) uint32 {
- return (x >> (y & 31)) | (x << (32 - (y & 31)))
-}
-
-// The RS matrix. See [TWOFISH] 4.3
-var rs = [4][8]byte{
- {0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E},
- {0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5},
- {0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19},
- {0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03},
-}
-
-// sbox tables
-var sbox = [2][256]byte{
- {
- 0xa9, 0x67, 0xb3, 0xe8, 0x04, 0xfd, 0xa3, 0x76, 0x9a, 0x92, 0x80, 0x78, 0xe4, 0xdd, 0xd1, 0x38,
- 0x0d, 0xc6, 0x35, 0x98, 0x18, 0xf7, 0xec, 0x6c, 0x43, 0x75, 0x37, 0x26, 0xfa, 0x13, 0x94, 0x48,
- 0xf2, 0xd0, 0x8b, 0x30, 0x84, 0x54, 0xdf, 0x23, 0x19, 0x5b, 0x3d, 0x59, 0xf3, 0xae, 0xa2, 0x82,
- 0x63, 0x01, 0x83, 0x2e, 0xd9, 0x51, 0x9b, 0x7c, 0xa6, 0xeb, 0xa5, 0xbe, 0x16, 0x0c, 0xe3, 0x61,
- 0xc0, 0x8c, 0x3a, 0xf5, 0x73, 0x2c, 0x25, 0x0b, 0xbb, 0x4e, 0x89, 0x6b, 0x53, 0x6a, 0xb4, 0xf1,
- 0xe1, 0xe6, 0xbd, 0x45, 0xe2, 0xf4, 0xb6, 0x66, 0xcc, 0x95, 0x03, 0x56, 0xd4, 0x1c, 0x1e, 0xd7,
- 0xfb, 0xc3, 0x8e, 0xb5, 0xe9, 0xcf, 0xbf, 0xba, 0xea, 0x77, 0x39, 0xaf, 0x33, 0xc9, 0x62, 0x71,
- 0x81, 0x79, 0x09, 0xad, 0x24, 0xcd, 0xf9, 0xd8, 0xe5, 0xc5, 0xb9, 0x4d, 0x44, 0x08, 0x86, 0xe7,
- 0xa1, 0x1d, 0xaa, 0xed, 0x06, 0x70, 0xb2, 0xd2, 0x41, 0x7b, 0xa0, 0x11, 0x31, 0xc2, 0x27, 0x90,
- 0x20, 0xf6, 0x60, 0xff, 0x96, 0x5c, 0xb1, 0xab, 0x9e, 0x9c, 0x52, 0x1b, 0x5f, 0x93, 0x0a, 0xef,
- 0x91, 0x85, 0x49, 0xee, 0x2d, 0x4f, 0x8f, 0x3b, 0x47, 0x87, 0x6d, 0x46, 0xd6, 0x3e, 0x69, 0x64,
- 0x2a, 0xce, 0xcb, 0x2f, 0xfc, 0x97, 0x05, 0x7a, 0xac, 0x7f, 0xd5, 0x1a, 0x4b, 0x0e, 0xa7, 0x5a,
- 0x28, 0x14, 0x3f, 0x29, 0x88, 0x3c, 0x4c, 0x02, 0xb8, 0xda, 0xb0, 0x17, 0x55, 0x1f, 0x8a, 0x7d,
- 0x57, 0xc7, 0x8d, 0x74, 0xb7, 0xc4, 0x9f, 0x72, 0x7e, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34,
- 0x6e, 0x50, 0xde, 0x68, 0x65, 0xbc, 0xdb, 0xf8, 0xc8, 0xa8, 0x2b, 0x40, 0xdc, 0xfe, 0x32, 0xa4,
- 0xca, 0x10, 0x21, 0xf0, 0xd3, 0x5d, 0x0f, 0x00, 0x6f, 0x9d, 0x36, 0x42, 0x4a, 0x5e, 0xc1, 0xe0,
- },
- {
- 0x75, 0xf3, 0xc6, 0xf4, 0xdb, 0x7b, 0xfb, 0xc8, 0x4a, 0xd3, 0xe6, 0x6b, 0x45, 0x7d, 0xe8, 0x4b,
- 0xd6, 0x32, 0xd8, 0xfd, 0x37, 0x71, 0xf1, 0xe1, 0x30, 0x0f, 0xf8, 0x1b, 0x87, 0xfa, 0x06, 0x3f,
- 0x5e, 0xba, 0xae, 0x5b, 0x8a, 0x00, 0xbc, 0x9d, 0x6d, 0xc1, 0xb1, 0x0e, 0x80, 0x5d, 0xd2, 0xd5,
- 0xa0, 0x84, 0x07, 0x14, 0xb5, 0x90, 0x2c, 0xa3, 0xb2, 0x73, 0x4c, 0x54, 0x92, 0x74, 0x36, 0x51,
- 0x38, 0xb0, 0xbd, 0x5a, 0xfc, 0x60, 0x62, 0x96, 0x6c, 0x42, 0xf7, 0x10, 0x7c, 0x28, 0x27, 0x8c,
- 0x13, 0x95, 0x9c, 0xc7, 0x24, 0x46, 0x3b, 0x70, 0xca, 0xe3, 0x85, 0xcb, 0x11, 0xd0, 0x93, 0xb8,
- 0xa6, 0x83, 0x20, 0xff, 0x9f, 0x77, 0xc3, 0xcc, 0x03, 0x6f, 0x08, 0xbf, 0x40, 0xe7, 0x2b, 0xe2,
- 0x79, 0x0c, 0xaa, 0x82, 0x41, 0x3a, 0xea, 0xb9, 0xe4, 0x9a, 0xa4, 0x97, 0x7e, 0xda, 0x7a, 0x17,
- 0x66, 0x94, 0xa1, 0x1d, 0x3d, 0xf0, 0xde, 0xb3, 0x0b, 0x72, 0xa7, 0x1c, 0xef, 0xd1, 0x53, 0x3e,
- 0x8f, 0x33, 0x26, 0x5f, 0xec, 0x76, 0x2a, 0x49, 0x81, 0x88, 0xee, 0x21, 0xc4, 0x1a, 0xeb, 0xd9,
- 0xc5, 0x39, 0x99, 0xcd, 0xad, 0x31, 0x8b, 0x01, 0x18, 0x23, 0xdd, 0x1f, 0x4e, 0x2d, 0xf9, 0x48,
- 0x4f, 0xf2, 0x65, 0x8e, 0x78, 0x5c, 0x58, 0x19, 0x8d, 0xe5, 0x98, 0x57, 0x67, 0x7f, 0x05, 0x64,
- 0xaf, 0x63, 0xb6, 0xfe, 0xf5, 0xb7, 0x3c, 0xa5, 0xce, 0xe9, 0x68, 0x44, 0xe0, 0x4d, 0x43, 0x69,
- 0x29, 0x2e, 0xac, 0x15, 0x59, 0xa8, 0x0a, 0x9e, 0x6e, 0x47, 0xdf, 0x34, 0x35, 0x6a, 0xcf, 0xdc,
- 0x22, 0xc9, 0xc0, 0x9b, 0x89, 0xd4, 0xed, 0xab, 0x12, 0xa2, 0x0d, 0x52, 0xbb, 0x02, 0x2f, 0xa9,
- 0xd7, 0x61, 0x1e, 0xb4, 0x50, 0x04, 0xf6, 0xc2, 0x16, 0x25, 0x86, 0x56, 0x55, 0x09, 0xbe, 0x91,
- },
-}
-
-// gfMult returns a·b in GF(2^8)/p
-func gfMult(a, b byte, p uint32) byte {
- B := [2]uint32{0, uint32(b)}
- P := [2]uint32{0, p}
- var result uint32
-
- // branchless GF multiplier
- for i := 0; i < 7; i++ {
- result ^= B[a&1]
- a >>= 1
- B[1] = P[B[1]>>7] ^ (B[1] << 1)
- }
- result ^= B[a&1]
- return byte(result)
-}
-
-// mdsColumnMult calculates y{col} where [y0 y1 y2 y3] = MDS · [x0]
-func mdsColumnMult(in byte, col int) uint32 {
- mul01 := in
- mul5B := gfMult(in, 0x5B, mdsPolynomial)
- mulEF := gfMult(in, 0xEF, mdsPolynomial)
-
- switch col {
- case 0:
- return uint32(mul01) | uint32(mul5B)<<8 | uint32(mulEF)<<16 | uint32(mulEF)<<24
- case 1:
- return uint32(mulEF) | uint32(mulEF)<<8 | uint32(mul5B)<<16 | uint32(mul01)<<24
- case 2:
- return uint32(mul5B) | uint32(mulEF)<<8 | uint32(mul01)<<16 | uint32(mulEF)<<24
- case 3:
- return uint32(mul5B) | uint32(mul01)<<8 | uint32(mulEF)<<16 | uint32(mul5B)<<24
- }
-
- panic("unreachable")
-}
-
-// h implements the S-box generation function. See [TWOFISH] 4.3.5
-func h(in, key []byte, offset int) uint32 {
- var y [4]byte
- for x := range y {
- y[x] = in[x]
- }
- switch len(key) / 8 {
- case 4:
- y[0] = sbox[1][y[0]] ^ key[4*(6+offset)+0]
- y[1] = sbox[0][y[1]] ^ key[4*(6+offset)+1]
- y[2] = sbox[0][y[2]] ^ key[4*(6+offset)+2]
- y[3] = sbox[1][y[3]] ^ key[4*(6+offset)+3]
- fallthrough
- case 3:
- y[0] = sbox[1][y[0]] ^ key[4*(4+offset)+0]
- y[1] = sbox[1][y[1]] ^ key[4*(4+offset)+1]
- y[2] = sbox[0][y[2]] ^ key[4*(4+offset)+2]
- y[3] = sbox[0][y[3]] ^ key[4*(4+offset)+3]
- fallthrough
- case 2:
- y[0] = sbox[1][sbox[0][sbox[0][y[0]]^key[4*(2+offset)+0]]^key[4*(0+offset)+0]]
- y[1] = sbox[0][sbox[0][sbox[1][y[1]]^key[4*(2+offset)+1]]^key[4*(0+offset)+1]]
- y[2] = sbox[1][sbox[1][sbox[0][y[2]]^key[4*(2+offset)+2]]^key[4*(0+offset)+2]]
- y[3] = sbox[0][sbox[1][sbox[1][y[3]]^key[4*(2+offset)+3]]^key[4*(0+offset)+3]]
- }
- // [y0 y1 y2 y3] = MDS . [x0 x1 x2 x3]
- var mdsMult uint32
- for i := range y {
- mdsMult ^= mdsColumnMult(y[i], i)
- }
- return mdsMult
-}
-
-// Encrypt encrypts a 16-byte block from src to dst, which may overlap.
-// Note that for amounts of data larger than a block,
-// it is not safe to just call Encrypt on successive blocks;
-// instead, use an encryption mode like CBC (see crypto/cipher/cbc.go).
-func (c *Cipher) Encrypt(dst, src []byte) {
- S1 := c.s[0]
- S2 := c.s[1]
- S3 := c.s[2]
- S4 := c.s[3]
-
- // Load input
- ia := load32l(src[0:4])
- ib := load32l(src[4:8])
- ic := load32l(src[8:12])
- id := load32l(src[12:16])
-
- // Pre-whitening
- ia ^= c.k[0]
- ib ^= c.k[1]
- ic ^= c.k[2]
- id ^= c.k[3]
-
- for i := 0; i < 8; i++ {
- k := c.k[8+i*4 : 12+i*4]
- t2 := S2[byte(ib)] ^ S3[byte(ib>>8)] ^ S4[byte(ib>>16)] ^ S1[byte(ib>>24)]
- t1 := S1[byte(ia)] ^ S2[byte(ia>>8)] ^ S3[byte(ia>>16)] ^ S4[byte(ia>>24)] + t2
- ic = ror(ic^(t1+k[0]), 1)
- id = rol(id, 1) ^ (t2 + t1 + k[1])
-
- t2 = S2[byte(id)] ^ S3[byte(id>>8)] ^ S4[byte(id>>16)] ^ S1[byte(id>>24)]
- t1 = S1[byte(ic)] ^ S2[byte(ic>>8)] ^ S3[byte(ic>>16)] ^ S4[byte(ic>>24)] + t2
- ia = ror(ia^(t1+k[2]), 1)
- ib = rol(ib, 1) ^ (t2 + t1 + k[3])
- }
-
- // Output with "undo last swap"
- ta := ic ^ c.k[4]
- tb := id ^ c.k[5]
- tc := ia ^ c.k[6]
- td := ib ^ c.k[7]
-
- store32l(dst[0:4], ta)
- store32l(dst[4:8], tb)
- store32l(dst[8:12], tc)
- store32l(dst[12:16], td)
-}
-
-// Decrypt decrypts a 16-byte block from src to dst, which may overlap.
-func (c *Cipher) Decrypt(dst, src []byte) {
- S1 := c.s[0]
- S2 := c.s[1]
- S3 := c.s[2]
- S4 := c.s[3]
-
- // Load input
- ta := load32l(src[0:4])
- tb := load32l(src[4:8])
- tc := load32l(src[8:12])
- td := load32l(src[12:16])
-
- // Undo undo final swap
- ia := tc ^ c.k[6]
- ib := td ^ c.k[7]
- ic := ta ^ c.k[4]
- id := tb ^ c.k[5]
-
- for i := 8; i > 0; i-- {
- k := c.k[4+i*4 : 8+i*4]
- t2 := S2[byte(id)] ^ S3[byte(id>>8)] ^ S4[byte(id>>16)] ^ S1[byte(id>>24)]
- t1 := S1[byte(ic)] ^ S2[byte(ic>>8)] ^ S3[byte(ic>>16)] ^ S4[byte(ic>>24)] + t2
- ia = rol(ia, 1) ^ (t1 + k[2])
- ib = ror(ib^(t2+t1+k[3]), 1)
-
- t2 = S2[byte(ib)] ^ S3[byte(ib>>8)] ^ S4[byte(ib>>16)] ^ S1[byte(ib>>24)]
- t1 = S1[byte(ia)] ^ S2[byte(ia>>8)] ^ S3[byte(ia>>16)] ^ S4[byte(ia>>24)] + t2
- ic = rol(ic, 1) ^ (t1 + k[0])
- id = ror(id^(t2+t1+k[1]), 1)
- }
-
- // Undo pre-whitening
- ia ^= c.k[0]
- ib ^= c.k[1]
- ic ^= c.k[2]
- id ^= c.k[3]
-
- store32l(dst[0:4], ia)
- store32l(dst[4:8], ib)
- store32l(dst[8:12], ic)
- store32l(dst[12:16], id)
-}
diff --git a/vendor/golang.org/x/crypto/xtea/block.go b/vendor/golang.org/x/crypto/xtea/block.go
deleted file mode 100644
index fcb4e4d..0000000
--- a/vendor/golang.org/x/crypto/xtea/block.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
- Implementation adapted from Needham and Wheeler's paper:
- http://www.cix.co.uk/~klockstone/xtea.pdf
-
- A precalculated look up table is used during encryption/decryption for values that are based purely on the key.
-*/
-
-package xtea
-
-// XTEA is based on 64 rounds.
-const numRounds = 64
-
-// blockToUint32 reads an 8 byte slice into two uint32s.
-// The block is treated as big endian.
-func blockToUint32(src []byte) (uint32, uint32) {
- r0 := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
- r1 := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
- return r0, r1
-}
-
-// uint32ToBlock writes two uint32s into an 8 byte data block.
-// Values are written as big endian.
-func uint32ToBlock(v0, v1 uint32, dst []byte) {
- dst[0] = byte(v0 >> 24)
- dst[1] = byte(v0 >> 16)
- dst[2] = byte(v0 >> 8)
- dst[3] = byte(v0)
- dst[4] = byte(v1 >> 24)
- dst[5] = byte(v1 >> 16)
- dst[6] = byte(v1 >> 8)
- dst[7] = byte(v1 >> 0)
-}
-
-// encryptBlock encrypts a single 8 byte block using XTEA.
-func encryptBlock(c *Cipher, dst, src []byte) {
- v0, v1 := blockToUint32(src)
-
- // Two rounds of XTEA applied per loop
- for i := 0; i < numRounds; {
- v0 += ((v1<<4 ^ v1>>5) + v1) ^ c.table[i]
- i++
- v1 += ((v0<<4 ^ v0>>5) + v0) ^ c.table[i]
- i++
- }
-
- uint32ToBlock(v0, v1, dst)
-}
-
-// decryptBlock decrypts a single 8 byte block using XTEA.
-func decryptBlock(c *Cipher, dst, src []byte) {
- v0, v1 := blockToUint32(src)
-
- // Two rounds of XTEA applied per loop
- for i := numRounds; i > 0; {
- i--
- v1 -= ((v0<<4 ^ v0>>5) + v0) ^ c.table[i]
- i--
- v0 -= ((v1<<4 ^ v1>>5) + v1) ^ c.table[i]
- }
-
- uint32ToBlock(v0, v1, dst)
-}
diff --git a/vendor/golang.org/x/crypto/xtea/cipher.go b/vendor/golang.org/x/crypto/xtea/cipher.go
deleted file mode 100644
index a4c2fd0..0000000
--- a/vendor/golang.org/x/crypto/xtea/cipher.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package xtea implements XTEA encryption, as defined in Needham and Wheeler's
-// 1997 technical report, "Tea extensions."
-//
-// XTEA is a legacy cipher and its short block size makes it vulnerable to
-// birthday bound attacks (see https://sweet32.info). It should only be used
-// where compatibility with legacy systems, not security, is the goal.
-//
-// Deprecated: any new system should use AES (from crypto/aes, if necessary in
-// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
-// golang.org/x/crypto/chacha20poly1305).
-package xtea // import "golang.org/x/crypto/xtea"
-
-// For details, see http://www.cix.co.uk/~klockstone/xtea.pdf
-
-import "strconv"
-
-// The XTEA block size in bytes.
-const BlockSize = 8
-
-// A Cipher is an instance of an XTEA cipher using a particular key.
-type Cipher struct {
- // table contains a series of precalculated values that are used each round.
- table [64]uint32
-}
-
-type KeySizeError int
-
-func (k KeySizeError) Error() string {
- return "crypto/xtea: invalid key size " + strconv.Itoa(int(k))
-}
-
-// NewCipher creates and returns a new Cipher.
-// The key argument should be the XTEA key.
-// XTEA only supports 128 bit (16 byte) keys.
-func NewCipher(key []byte) (*Cipher, error) {
- k := len(key)
- switch k {
- default:
- return nil, KeySizeError(k)
- case 16:
- break
- }
-
- c := new(Cipher)
- initCipher(c, key)
-
- return c, nil
-}
-
-// BlockSize returns the XTEA block size, 8 bytes.
-// It is necessary to satisfy the Block interface in the
-// package "crypto/cipher".
-func (c *Cipher) BlockSize() int { return BlockSize }
-
-// Encrypt encrypts the 8 byte buffer src using the key and stores the result in dst.
-// Note that for amounts of data larger than a block,
-// it is not safe to just call Encrypt on successive blocks;
-// instead, use an encryption mode like CBC (see crypto/cipher/cbc.go).
-func (c *Cipher) Encrypt(dst, src []byte) { encryptBlock(c, dst, src) }
-
-// Decrypt decrypts the 8 byte buffer src using the key and stores the result in dst.
-func (c *Cipher) Decrypt(dst, src []byte) { decryptBlock(c, dst, src) }
-
-// initCipher initializes the cipher context by creating a look up table
-// of precalculated values that are based on the key.
-func initCipher(c *Cipher, key []byte) {
- // Load the key into four uint32s
- var k [4]uint32
- for i := 0; i < len(k); i++ {
- j := i << 2 // Multiply by 4
- k[i] = uint32(key[j+0])<<24 | uint32(key[j+1])<<16 | uint32(key[j+2])<<8 | uint32(key[j+3])
- }
-
- // Precalculate the table
- const delta = 0x9E3779B9
- var sum uint32
-
- // Two rounds of XTEA applied per loop
- for i := 0; i < numRounds; {
- c.table[i] = sum + k[sum&3]
- i++
- sum += delta
- c.table[i] = sum + k[(sum>>11)&3]
- i++
- }
-}
diff --git a/vendor/golang.org/x/net/AUTHORS b/vendor/golang.org/x/net/AUTHORS
deleted file mode 100644
index 15167cd..0000000
--- a/vendor/golang.org/x/net/AUTHORS
+++ /dev/null
@@ -1,3 +0,0 @@
-# This source code refers to The Go Authors for copyright purposes.
-# The master list of authors is in the main Go distribution,
-# visible at http://tip.golang.org/AUTHORS.
diff --git a/vendor/golang.org/x/net/CONTRIBUTORS b/vendor/golang.org/x/net/CONTRIBUTORS
deleted file mode 100644
index 1c4577e..0000000
--- a/vendor/golang.org/x/net/CONTRIBUTORS
+++ /dev/null
@@ -1,3 +0,0 @@
-# This source code was written by the Go contributors.
-# The master list of contributors is in the main Go distribution,
-# visible at http://tip.golang.org/CONTRIBUTORS.
diff --git a/vendor/golang.org/x/net/LICENSE b/vendor/golang.org/x/net/LICENSE
deleted file mode 100644
index 6a66aea..0000000
--- a/vendor/golang.org/x/net/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/net/PATENTS b/vendor/golang.org/x/net/PATENTS
deleted file mode 100644
index 7330990..0000000
--- a/vendor/golang.org/x/net/PATENTS
+++ /dev/null
@@ -1,22 +0,0 @@
-Additional IP Rights Grant (Patents)
-
-"This implementation" means the copyrightable works distributed by
-Google as part of the Go project.
-
-Google hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this section)
-patent license to make, have made, use, offer to sell, sell, import,
-transfer and otherwise run, modify and propagate the contents of this
-implementation of Go, where such license applies only to those patent
-claims, both currently owned or controlled by Google and acquired in
-the future, licensable by Google that are necessarily infringed by this
-implementation of Go. This grant does not include claims that would be
-infringed only as a consequence of further modification of this
-implementation. If you or your agent or exclusive licensee institute or
-order or agree to the institution of patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging
-that this implementation of Go or any code incorporated within this
-implementation of Go constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any patent
-rights granted to you under this License for this implementation of Go
-shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/net/bpf/asm.go b/vendor/golang.org/x/net/bpf/asm.go
deleted file mode 100644
index 15e21b1..0000000
--- a/vendor/golang.org/x/net/bpf/asm.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-import "fmt"
-
-// Assemble converts insts into raw instructions suitable for loading
-// into a BPF virtual machine.
-//
-// Currently, no optimization is attempted, the assembled program flow
-// is exactly as provided.
-func Assemble(insts []Instruction) ([]RawInstruction, error) {
- ret := make([]RawInstruction, len(insts))
- var err error
- for i, inst := range insts {
- ret[i], err = inst.Assemble()
- if err != nil {
- return nil, fmt.Errorf("assembling instruction %d: %s", i+1, err)
- }
- }
- return ret, nil
-}
-
-// Disassemble attempts to parse raw back into
-// Instructions. Unrecognized RawInstructions are assumed to be an
-// extension not implemented by this package, and are passed through
-// unchanged to the output. The allDecoded value reports whether insts
-// contains no RawInstructions.
-func Disassemble(raw []RawInstruction) (insts []Instruction, allDecoded bool) {
- insts = make([]Instruction, len(raw))
- allDecoded = true
- for i, r := range raw {
- insts[i] = r.Disassemble()
- if _, ok := insts[i].(RawInstruction); ok {
- allDecoded = false
- }
- }
- return insts, allDecoded
-}
diff --git a/vendor/golang.org/x/net/bpf/constants.go b/vendor/golang.org/x/net/bpf/constants.go
deleted file mode 100644
index 12f3ee8..0000000
--- a/vendor/golang.org/x/net/bpf/constants.go
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-// A Register is a register of the BPF virtual machine.
-type Register uint16
-
-const (
- // RegA is the accumulator register. RegA is always the
- // destination register of ALU operations.
- RegA Register = iota
- // RegX is the indirection register, used by LoadIndirect
- // operations.
- RegX
-)
-
-// An ALUOp is an arithmetic or logic operation.
-type ALUOp uint16
-
-// ALU binary operation types.
-const (
- ALUOpAdd ALUOp = iota << 4
- ALUOpSub
- ALUOpMul
- ALUOpDiv
- ALUOpOr
- ALUOpAnd
- ALUOpShiftLeft
- ALUOpShiftRight
- aluOpNeg // Not exported because it's the only unary ALU operation, and gets its own instruction type.
- ALUOpMod
- ALUOpXor
-)
-
-// A JumpTest is a comparison operator used in conditional jumps.
-type JumpTest uint16
-
-// Supported operators for conditional jumps.
-// K can be RegX for JumpIfX
-const (
- // K == A
- JumpEqual JumpTest = iota
- // K != A
- JumpNotEqual
- // K > A
- JumpGreaterThan
- // K < A
- JumpLessThan
- // K >= A
- JumpGreaterOrEqual
- // K <= A
- JumpLessOrEqual
- // K & A != 0
- JumpBitsSet
- // K & A == 0
- JumpBitsNotSet
-)
-
-// An Extension is a function call provided by the kernel that
-// performs advanced operations that are expensive or impossible
-// within the BPF virtual machine.
-//
-// Extensions are only implemented by the Linux kernel.
-//
-// TODO: should we prune this list? Some of these extensions seem
-// either broken or near-impossible to use correctly, whereas other
-// (len, random, ifindex) are quite useful.
-type Extension int
-
-// Extension functions available in the Linux kernel.
-const (
- // extOffset is the negative maximum number of instructions used
- // to load instructions by overloading the K argument.
- extOffset = -0x1000
- // ExtLen returns the length of the packet.
- ExtLen Extension = 1
- // ExtProto returns the packet's L3 protocol type.
- ExtProto Extension = 0
- // ExtType returns the packet's type (skb->pkt_type in the kernel)
- //
- // TODO: better documentation. How nice an API do we want to
- // provide for these esoteric extensions?
- ExtType Extension = 4
- // ExtPayloadOffset returns the offset of the packet payload, or
- // the first protocol header that the kernel does not know how to
- // parse.
- ExtPayloadOffset Extension = 52
- // ExtInterfaceIndex returns the index of the interface on which
- // the packet was received.
- ExtInterfaceIndex Extension = 8
- // ExtNetlinkAttr returns the netlink attribute of type X at
- // offset A.
- ExtNetlinkAttr Extension = 12
- // ExtNetlinkAttrNested returns the nested netlink attribute of
- // type X at offset A.
- ExtNetlinkAttrNested Extension = 16
- // ExtMark returns the packet's mark value.
- ExtMark Extension = 20
- // ExtQueue returns the packet's assigned hardware queue.
- ExtQueue Extension = 24
- // ExtLinkLayerType returns the packet's hardware address type
- // (e.g. Ethernet, Infiniband).
- ExtLinkLayerType Extension = 28
- // ExtRXHash returns the packets receive hash.
- //
- // TODO: figure out what this rxhash actually is.
- ExtRXHash Extension = 32
- // ExtCPUID returns the ID of the CPU processing the current
- // packet.
- ExtCPUID Extension = 36
- // ExtVLANTag returns the packet's VLAN tag.
- ExtVLANTag Extension = 44
- // ExtVLANTagPresent returns non-zero if the packet has a VLAN
- // tag.
- //
- // TODO: I think this might be a lie: it reads bit 0x1000 of the
- // VLAN header, which changed meaning in recent revisions of the
- // spec - this extension may now return meaningless information.
- ExtVLANTagPresent Extension = 48
- // ExtVLANProto returns 0x8100 if the frame has a VLAN header,
- // 0x88a8 if the frame has a "Q-in-Q" double VLAN header, or some
- // other value if no VLAN information is present.
- ExtVLANProto Extension = 60
- // ExtRand returns a uniformly random uint32.
- ExtRand Extension = 56
-)
-
-// The following gives names to various bit patterns used in opcode construction.
-
-const (
- opMaskCls uint16 = 0x7
- // opClsLoad masks
- opMaskLoadDest = 0x01
- opMaskLoadWidth = 0x18
- opMaskLoadMode = 0xe0
- // opClsALU & opClsJump
- opMaskOperand = 0x08
- opMaskOperator = 0xf0
-)
-
-const (
- // +---------------+-----------------+---+---+---+
- // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 0 |
- // +---------------+-----------------+---+---+---+
- opClsLoadA uint16 = iota
- // +---------------+-----------------+---+---+---+
- // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 1 |
- // +---------------+-----------------+---+---+---+
- opClsLoadX
- // +---+---+---+---+---+---+---+---+
- // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
- // +---+---+---+---+---+---+---+---+
- opClsStoreA
- // +---+---+---+---+---+---+---+---+
- // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
- // +---+---+---+---+---+---+---+---+
- opClsStoreX
- // +---------------+-----------------+---+---+---+
- // | Operator (4b) | OperandSrc (1b) | 1 | 0 | 0 |
- // +---------------+-----------------+---+---+---+
- opClsALU
- // +-----------------------------+---+---+---+---+
- // | TestOperator (4b) | 0 | 1 | 0 | 1 |
- // +-----------------------------+---+---+---+---+
- opClsJump
- // +---+-------------------------+---+---+---+---+
- // | 0 | 0 | 0 | RetSrc (1b) | 0 | 1 | 1 | 0 |
- // +---+-------------------------+---+---+---+---+
- opClsReturn
- // +---+-------------------------+---+---+---+---+
- // | 0 | 0 | 0 | TXAorTAX (1b) | 0 | 1 | 1 | 1 |
- // +---+-------------------------+---+---+---+---+
- opClsMisc
-)
-
-const (
- opAddrModeImmediate uint16 = iota << 5
- opAddrModeAbsolute
- opAddrModeIndirect
- opAddrModeScratch
- opAddrModePacketLen // actually an extension, not an addressing mode.
- opAddrModeMemShift
-)
-
-const (
- opLoadWidth4 uint16 = iota << 3
- opLoadWidth2
- opLoadWidth1
-)
-
-// Operand for ALU and Jump instructions
-type opOperand uint16
-
-// Supported operand sources.
-const (
- opOperandConstant opOperand = iota << 3
- opOperandX
-)
-
-// An jumpOp is a conditional jump condition.
-type jumpOp uint16
-
-// Supported jump conditions.
-const (
- opJumpAlways jumpOp = iota << 4
- opJumpEqual
- opJumpGT
- opJumpGE
- opJumpSet
-)
-
-const (
- opRetSrcConstant uint16 = iota << 4
- opRetSrcA
-)
-
-const (
- opMiscTAX = 0x00
- opMiscTXA = 0x80
-)
diff --git a/vendor/golang.org/x/net/bpf/doc.go b/vendor/golang.org/x/net/bpf/doc.go
deleted file mode 100644
index ae62feb..0000000
--- a/vendor/golang.org/x/net/bpf/doc.go
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-
-Package bpf implements marshaling and unmarshaling of programs for the
-Berkeley Packet Filter virtual machine, and provides a Go implementation
-of the virtual machine.
-
-BPF's main use is to specify a packet filter for network taps, so that
-the kernel doesn't have to expensively copy every packet it sees to
-userspace. However, it's been repurposed to other areas where running
-user code in-kernel is needed. For example, Linux's seccomp uses BPF
-to apply security policies to system calls. For simplicity, this
-documentation refers only to packets, but other uses of BPF have their
-own data payloads.
-
-BPF programs run in a restricted virtual machine. It has almost no
-access to kernel functions, and while conditional branches are
-allowed, they can only jump forwards, to guarantee that there are no
-infinite loops.
-
-The virtual machine
-
-The BPF VM is an accumulator machine. Its main register, called
-register A, is an implicit source and destination in all arithmetic
-and logic operations. The machine also has 16 scratch registers for
-temporary storage, and an indirection register (register X) for
-indirect memory access. All registers are 32 bits wide.
-
-Each run of a BPF program is given one packet, which is placed in the
-VM's read-only "main memory". LoadAbsolute and LoadIndirect
-instructions can fetch up to 32 bits at a time into register A for
-examination.
-
-The goal of a BPF program is to produce and return a verdict (uint32),
-which tells the kernel what to do with the packet. In the context of
-packet filtering, the returned value is the number of bytes of the
-packet to forward to userspace, or 0 to ignore the packet. Other
-contexts like seccomp define their own return values.
-
-In order to simplify programs, attempts to read past the end of the
-packet terminate the program execution with a verdict of 0 (ignore
-packet). This means that the vast majority of BPF programs don't need
-to do any explicit bounds checking.
-
-In addition to the bytes of the packet, some BPF programs have access
-to extensions, which are essentially calls to kernel utility
-functions. Currently, the only extensions supported by this package
-are the Linux packet filter extensions.
-
-Examples
-
-This packet filter selects all ARP packets.
-
- bpf.Assemble([]bpf.Instruction{
- // Load "EtherType" field from the ethernet header.
- bpf.LoadAbsolute{Off: 12, Size: 2},
- // Skip over the next instruction if EtherType is not ARP.
- bpf.JumpIf{Cond: bpf.JumpNotEqual, Val: 0x0806, SkipTrue: 1},
- // Verdict is "send up to 4k of the packet to userspace."
- bpf.RetConstant{Val: 4096},
- // Verdict is "ignore packet."
- bpf.RetConstant{Val: 0},
- })
-
-This packet filter captures a random 1% sample of traffic.
-
- bpf.Assemble([]bpf.Instruction{
- // Get a 32-bit random number from the Linux kernel.
- bpf.LoadExtension{Num: bpf.ExtRand},
- // 1% dice roll?
- bpf.JumpIf{Cond: bpf.JumpLessThan, Val: 2^32/100, SkipFalse: 1},
- // Capture.
- bpf.RetConstant{Val: 4096},
- // Ignore.
- bpf.RetConstant{Val: 0},
- })
-
-*/
-package bpf // import "golang.org/x/net/bpf"
diff --git a/vendor/golang.org/x/net/bpf/instructions.go b/vendor/golang.org/x/net/bpf/instructions.go
deleted file mode 100644
index 3cffcaa..0000000
--- a/vendor/golang.org/x/net/bpf/instructions.go
+++ /dev/null
@@ -1,726 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-import "fmt"
-
-// An Instruction is one instruction executed by the BPF virtual
-// machine.
-type Instruction interface {
- // Assemble assembles the Instruction into a RawInstruction.
- Assemble() (RawInstruction, error)
-}
-
-// A RawInstruction is a raw BPF virtual machine instruction.
-type RawInstruction struct {
- // Operation to execute.
- Op uint16
- // For conditional jump instructions, the number of instructions
- // to skip if the condition is true/false.
- Jt uint8
- Jf uint8
- // Constant parameter. The meaning depends on the Op.
- K uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (ri RawInstruction) Assemble() (RawInstruction, error) { return ri, nil }
-
-// Disassemble parses ri into an Instruction and returns it. If ri is
-// not recognized by this package, ri itself is returned.
-func (ri RawInstruction) Disassemble() Instruction {
- switch ri.Op & opMaskCls {
- case opClsLoadA, opClsLoadX:
- reg := Register(ri.Op & opMaskLoadDest)
- sz := 0
- switch ri.Op & opMaskLoadWidth {
- case opLoadWidth4:
- sz = 4
- case opLoadWidth2:
- sz = 2
- case opLoadWidth1:
- sz = 1
- default:
- return ri
- }
- switch ri.Op & opMaskLoadMode {
- case opAddrModeImmediate:
- if sz != 4 {
- return ri
- }
- return LoadConstant{Dst: reg, Val: ri.K}
- case opAddrModeScratch:
- if sz != 4 || ri.K > 15 {
- return ri
- }
- return LoadScratch{Dst: reg, N: int(ri.K)}
- case opAddrModeAbsolute:
- if ri.K > extOffset+0xffffffff {
- return LoadExtension{Num: Extension(-extOffset + ri.K)}
- }
- return LoadAbsolute{Size: sz, Off: ri.K}
- case opAddrModeIndirect:
- return LoadIndirect{Size: sz, Off: ri.K}
- case opAddrModePacketLen:
- if sz != 4 {
- return ri
- }
- return LoadExtension{Num: ExtLen}
- case opAddrModeMemShift:
- return LoadMemShift{Off: ri.K}
- default:
- return ri
- }
-
- case opClsStoreA:
- if ri.Op != opClsStoreA || ri.K > 15 {
- return ri
- }
- return StoreScratch{Src: RegA, N: int(ri.K)}
-
- case opClsStoreX:
- if ri.Op != opClsStoreX || ri.K > 15 {
- return ri
- }
- return StoreScratch{Src: RegX, N: int(ri.K)}
-
- case opClsALU:
- switch op := ALUOp(ri.Op & opMaskOperator); op {
- case ALUOpAdd, ALUOpSub, ALUOpMul, ALUOpDiv, ALUOpOr, ALUOpAnd, ALUOpShiftLeft, ALUOpShiftRight, ALUOpMod, ALUOpXor:
- switch operand := opOperand(ri.Op & opMaskOperand); operand {
- case opOperandX:
- return ALUOpX{Op: op}
- case opOperandConstant:
- return ALUOpConstant{Op: op, Val: ri.K}
- default:
- return ri
- }
- case aluOpNeg:
- return NegateA{}
- default:
- return ri
- }
-
- case opClsJump:
- switch op := jumpOp(ri.Op & opMaskOperator); op {
- case opJumpAlways:
- return Jump{Skip: ri.K}
- case opJumpEqual, opJumpGT, opJumpGE, opJumpSet:
- cond, skipTrue, skipFalse := jumpOpToTest(op, ri.Jt, ri.Jf)
- switch operand := opOperand(ri.Op & opMaskOperand); operand {
- case opOperandX:
- return JumpIfX{Cond: cond, SkipTrue: skipTrue, SkipFalse: skipFalse}
- case opOperandConstant:
- return JumpIf{Cond: cond, Val: ri.K, SkipTrue: skipTrue, SkipFalse: skipFalse}
- default:
- return ri
- }
- default:
- return ri
- }
-
- case opClsReturn:
- switch ri.Op {
- case opClsReturn | opRetSrcA:
- return RetA{}
- case opClsReturn | opRetSrcConstant:
- return RetConstant{Val: ri.K}
- default:
- return ri
- }
-
- case opClsMisc:
- switch ri.Op {
- case opClsMisc | opMiscTAX:
- return TAX{}
- case opClsMisc | opMiscTXA:
- return TXA{}
- default:
- return ri
- }
-
- default:
- panic("unreachable") // switch is exhaustive on the bit pattern
- }
-}
-
-func jumpOpToTest(op jumpOp, skipTrue uint8, skipFalse uint8) (JumpTest, uint8, uint8) {
- var test JumpTest
-
- // Decode "fake" jump conditions that don't appear in machine code
- // Ensures the Assemble -> Disassemble stage recreates the same instructions
- // See https://github.com/golang/go/issues/18470
- if skipTrue == 0 {
- switch op {
- case opJumpEqual:
- test = JumpNotEqual
- case opJumpGT:
- test = JumpLessOrEqual
- case opJumpGE:
- test = JumpLessThan
- case opJumpSet:
- test = JumpBitsNotSet
- }
-
- return test, skipFalse, 0
- }
-
- switch op {
- case opJumpEqual:
- test = JumpEqual
- case opJumpGT:
- test = JumpGreaterThan
- case opJumpGE:
- test = JumpGreaterOrEqual
- case opJumpSet:
- test = JumpBitsSet
- }
-
- return test, skipTrue, skipFalse
-}
-
-// LoadConstant loads Val into register Dst.
-type LoadConstant struct {
- Dst Register
- Val uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadConstant) Assemble() (RawInstruction, error) {
- return assembleLoad(a.Dst, 4, opAddrModeImmediate, a.Val)
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadConstant) String() string {
- switch a.Dst {
- case RegA:
- return fmt.Sprintf("ld #%d", a.Val)
- case RegX:
- return fmt.Sprintf("ldx #%d", a.Val)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// LoadScratch loads scratch[N] into register Dst.
-type LoadScratch struct {
- Dst Register
- N int // 0-15
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadScratch) Assemble() (RawInstruction, error) {
- if a.N < 0 || a.N > 15 {
- return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N)
- }
- return assembleLoad(a.Dst, 4, opAddrModeScratch, uint32(a.N))
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadScratch) String() string {
- switch a.Dst {
- case RegA:
- return fmt.Sprintf("ld M[%d]", a.N)
- case RegX:
- return fmt.Sprintf("ldx M[%d]", a.N)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// LoadAbsolute loads packet[Off:Off+Size] as an integer value into
-// register A.
-type LoadAbsolute struct {
- Off uint32
- Size int // 1, 2 or 4
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadAbsolute) Assemble() (RawInstruction, error) {
- return assembleLoad(RegA, a.Size, opAddrModeAbsolute, a.Off)
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadAbsolute) String() string {
- switch a.Size {
- case 1: // byte
- return fmt.Sprintf("ldb [%d]", a.Off)
- case 2: // half word
- return fmt.Sprintf("ldh [%d]", a.Off)
- case 4: // word
- if a.Off > extOffset+0xffffffff {
- return LoadExtension{Num: Extension(a.Off + 0x1000)}.String()
- }
- return fmt.Sprintf("ld [%d]", a.Off)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// LoadIndirect loads packet[X+Off:X+Off+Size] as an integer value
-// into register A.
-type LoadIndirect struct {
- Off uint32
- Size int // 1, 2 or 4
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadIndirect) Assemble() (RawInstruction, error) {
- return assembleLoad(RegA, a.Size, opAddrModeIndirect, a.Off)
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadIndirect) String() string {
- switch a.Size {
- case 1: // byte
- return fmt.Sprintf("ldb [x + %d]", a.Off)
- case 2: // half word
- return fmt.Sprintf("ldh [x + %d]", a.Off)
- case 4: // word
- return fmt.Sprintf("ld [x + %d]", a.Off)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// LoadMemShift multiplies the first 4 bits of the byte at packet[Off]
-// by 4 and stores the result in register X.
-//
-// This instruction is mainly useful to load into X the length of an
-// IPv4 packet header in a single instruction, rather than have to do
-// the arithmetic on the header's first byte by hand.
-type LoadMemShift struct {
- Off uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadMemShift) Assemble() (RawInstruction, error) {
- return assembleLoad(RegX, 1, opAddrModeMemShift, a.Off)
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadMemShift) String() string {
- return fmt.Sprintf("ldx 4*([%d]&0xf)", a.Off)
-}
-
-// LoadExtension invokes a linux-specific extension and stores the
-// result in register A.
-type LoadExtension struct {
- Num Extension
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a LoadExtension) Assemble() (RawInstruction, error) {
- if a.Num == ExtLen {
- return assembleLoad(RegA, 4, opAddrModePacketLen, 0)
- }
- return assembleLoad(RegA, 4, opAddrModeAbsolute, uint32(extOffset+a.Num))
-}
-
-// String returns the instruction in assembler notation.
-func (a LoadExtension) String() string {
- switch a.Num {
- case ExtLen:
- return "ld #len"
- case ExtProto:
- return "ld #proto"
- case ExtType:
- return "ld #type"
- case ExtPayloadOffset:
- return "ld #poff"
- case ExtInterfaceIndex:
- return "ld #ifidx"
- case ExtNetlinkAttr:
- return "ld #nla"
- case ExtNetlinkAttrNested:
- return "ld #nlan"
- case ExtMark:
- return "ld #mark"
- case ExtQueue:
- return "ld #queue"
- case ExtLinkLayerType:
- return "ld #hatype"
- case ExtRXHash:
- return "ld #rxhash"
- case ExtCPUID:
- return "ld #cpu"
- case ExtVLANTag:
- return "ld #vlan_tci"
- case ExtVLANTagPresent:
- return "ld #vlan_avail"
- case ExtVLANProto:
- return "ld #vlan_tpid"
- case ExtRand:
- return "ld #rand"
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// StoreScratch stores register Src into scratch[N].
-type StoreScratch struct {
- Src Register
- N int // 0-15
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a StoreScratch) Assemble() (RawInstruction, error) {
- if a.N < 0 || a.N > 15 {
- return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N)
- }
- var op uint16
- switch a.Src {
- case RegA:
- op = opClsStoreA
- case RegX:
- op = opClsStoreX
- default:
- return RawInstruction{}, fmt.Errorf("invalid source register %v", a.Src)
- }
-
- return RawInstruction{
- Op: op,
- K: uint32(a.N),
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a StoreScratch) String() string {
- switch a.Src {
- case RegA:
- return fmt.Sprintf("st M[%d]", a.N)
- case RegX:
- return fmt.Sprintf("stx M[%d]", a.N)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// ALUOpConstant executes A = A Val.
-type ALUOpConstant struct {
- Op ALUOp
- Val uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a ALUOpConstant) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsALU | uint16(opOperandConstant) | uint16(a.Op),
- K: a.Val,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a ALUOpConstant) String() string {
- switch a.Op {
- case ALUOpAdd:
- return fmt.Sprintf("add #%d", a.Val)
- case ALUOpSub:
- return fmt.Sprintf("sub #%d", a.Val)
- case ALUOpMul:
- return fmt.Sprintf("mul #%d", a.Val)
- case ALUOpDiv:
- return fmt.Sprintf("div #%d", a.Val)
- case ALUOpMod:
- return fmt.Sprintf("mod #%d", a.Val)
- case ALUOpAnd:
- return fmt.Sprintf("and #%d", a.Val)
- case ALUOpOr:
- return fmt.Sprintf("or #%d", a.Val)
- case ALUOpXor:
- return fmt.Sprintf("xor #%d", a.Val)
- case ALUOpShiftLeft:
- return fmt.Sprintf("lsh #%d", a.Val)
- case ALUOpShiftRight:
- return fmt.Sprintf("rsh #%d", a.Val)
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// ALUOpX executes A = A X
-type ALUOpX struct {
- Op ALUOp
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a ALUOpX) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsALU | uint16(opOperandX) | uint16(a.Op),
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a ALUOpX) String() string {
- switch a.Op {
- case ALUOpAdd:
- return "add x"
- case ALUOpSub:
- return "sub x"
- case ALUOpMul:
- return "mul x"
- case ALUOpDiv:
- return "div x"
- case ALUOpMod:
- return "mod x"
- case ALUOpAnd:
- return "and x"
- case ALUOpOr:
- return "or x"
- case ALUOpXor:
- return "xor x"
- case ALUOpShiftLeft:
- return "lsh x"
- case ALUOpShiftRight:
- return "rsh x"
- default:
- return fmt.Sprintf("unknown instruction: %#v", a)
- }
-}
-
-// NegateA executes A = -A.
-type NegateA struct{}
-
-// Assemble implements the Instruction Assemble method.
-func (a NegateA) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsALU | uint16(aluOpNeg),
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a NegateA) String() string {
- return fmt.Sprintf("neg")
-}
-
-// Jump skips the following Skip instructions in the program.
-type Jump struct {
- Skip uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a Jump) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsJump | uint16(opJumpAlways),
- K: a.Skip,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a Jump) String() string {
- return fmt.Sprintf("ja %d", a.Skip)
-}
-
-// JumpIf skips the following Skip instructions in the program if A
-// Val is true.
-type JumpIf struct {
- Cond JumpTest
- Val uint32
- SkipTrue uint8
- SkipFalse uint8
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a JumpIf) Assemble() (RawInstruction, error) {
- return jumpToRaw(a.Cond, opOperandConstant, a.Val, a.SkipTrue, a.SkipFalse)
-}
-
-// String returns the instruction in assembler notation.
-func (a JumpIf) String() string {
- return jumpToString(a.Cond, fmt.Sprintf("#%d", a.Val), a.SkipTrue, a.SkipFalse)
-}
-
-// JumpIfX skips the following Skip instructions in the program if A
-// X is true.
-type JumpIfX struct {
- Cond JumpTest
- SkipTrue uint8
- SkipFalse uint8
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a JumpIfX) Assemble() (RawInstruction, error) {
- return jumpToRaw(a.Cond, opOperandX, 0, a.SkipTrue, a.SkipFalse)
-}
-
-// String returns the instruction in assembler notation.
-func (a JumpIfX) String() string {
- return jumpToString(a.Cond, "x", a.SkipTrue, a.SkipFalse)
-}
-
-// jumpToRaw assembles a jump instruction into a RawInstruction
-func jumpToRaw(test JumpTest, operand opOperand, k uint32, skipTrue, skipFalse uint8) (RawInstruction, error) {
- var (
- cond jumpOp
- flip bool
- )
- switch test {
- case JumpEqual:
- cond = opJumpEqual
- case JumpNotEqual:
- cond, flip = opJumpEqual, true
- case JumpGreaterThan:
- cond = opJumpGT
- case JumpLessThan:
- cond, flip = opJumpGE, true
- case JumpGreaterOrEqual:
- cond = opJumpGE
- case JumpLessOrEqual:
- cond, flip = opJumpGT, true
- case JumpBitsSet:
- cond = opJumpSet
- case JumpBitsNotSet:
- cond, flip = opJumpSet, true
- default:
- return RawInstruction{}, fmt.Errorf("unknown JumpTest %v", test)
- }
- jt, jf := skipTrue, skipFalse
- if flip {
- jt, jf = jf, jt
- }
- return RawInstruction{
- Op: opClsJump | uint16(cond) | uint16(operand),
- Jt: jt,
- Jf: jf,
- K: k,
- }, nil
-}
-
-// jumpToString converts a jump instruction to assembler notation
-func jumpToString(cond JumpTest, operand string, skipTrue, skipFalse uint8) string {
- switch cond {
- // K == A
- case JumpEqual:
- return conditionalJump(operand, skipTrue, skipFalse, "jeq", "jneq")
- // K != A
- case JumpNotEqual:
- return fmt.Sprintf("jneq %s,%d", operand, skipTrue)
- // K > A
- case JumpGreaterThan:
- return conditionalJump(operand, skipTrue, skipFalse, "jgt", "jle")
- // K < A
- case JumpLessThan:
- return fmt.Sprintf("jlt %s,%d", operand, skipTrue)
- // K >= A
- case JumpGreaterOrEqual:
- return conditionalJump(operand, skipTrue, skipFalse, "jge", "jlt")
- // K <= A
- case JumpLessOrEqual:
- return fmt.Sprintf("jle %s,%d", operand, skipTrue)
- // K & A != 0
- case JumpBitsSet:
- if skipFalse > 0 {
- return fmt.Sprintf("jset %s,%d,%d", operand, skipTrue, skipFalse)
- }
- return fmt.Sprintf("jset %s,%d", operand, skipTrue)
- // K & A == 0, there is no assembler instruction for JumpBitNotSet, use JumpBitSet and invert skips
- case JumpBitsNotSet:
- return jumpToString(JumpBitsSet, operand, skipFalse, skipTrue)
- default:
- return fmt.Sprintf("unknown JumpTest %#v", cond)
- }
-}
-
-func conditionalJump(operand string, skipTrue, skipFalse uint8, positiveJump, negativeJump string) string {
- if skipTrue > 0 {
- if skipFalse > 0 {
- return fmt.Sprintf("%s %s,%d,%d", positiveJump, operand, skipTrue, skipFalse)
- }
- return fmt.Sprintf("%s %s,%d", positiveJump, operand, skipTrue)
- }
- return fmt.Sprintf("%s %s,%d", negativeJump, operand, skipFalse)
-}
-
-// RetA exits the BPF program, returning the value of register A.
-type RetA struct{}
-
-// Assemble implements the Instruction Assemble method.
-func (a RetA) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsReturn | opRetSrcA,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a RetA) String() string {
- return fmt.Sprintf("ret a")
-}
-
-// RetConstant exits the BPF program, returning a constant value.
-type RetConstant struct {
- Val uint32
-}
-
-// Assemble implements the Instruction Assemble method.
-func (a RetConstant) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsReturn | opRetSrcConstant,
- K: a.Val,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a RetConstant) String() string {
- return fmt.Sprintf("ret #%d", a.Val)
-}
-
-// TXA copies the value of register X to register A.
-type TXA struct{}
-
-// Assemble implements the Instruction Assemble method.
-func (a TXA) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsMisc | opMiscTXA,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a TXA) String() string {
- return fmt.Sprintf("txa")
-}
-
-// TAX copies the value of register A to register X.
-type TAX struct{}
-
-// Assemble implements the Instruction Assemble method.
-func (a TAX) Assemble() (RawInstruction, error) {
- return RawInstruction{
- Op: opClsMisc | opMiscTAX,
- }, nil
-}
-
-// String returns the instruction in assembler notation.
-func (a TAX) String() string {
- return fmt.Sprintf("tax")
-}
-
-func assembleLoad(dst Register, loadSize int, mode uint16, k uint32) (RawInstruction, error) {
- var (
- cls uint16
- sz uint16
- )
- switch dst {
- case RegA:
- cls = opClsLoadA
- case RegX:
- cls = opClsLoadX
- default:
- return RawInstruction{}, fmt.Errorf("invalid target register %v", dst)
- }
- switch loadSize {
- case 1:
- sz = opLoadWidth1
- case 2:
- sz = opLoadWidth2
- case 4:
- sz = opLoadWidth4
- default:
- return RawInstruction{}, fmt.Errorf("invalid load byte length %d", sz)
- }
- return RawInstruction{
- Op: cls | sz | mode,
- K: k,
- }, nil
-}
diff --git a/vendor/golang.org/x/net/bpf/setter.go b/vendor/golang.org/x/net/bpf/setter.go
deleted file mode 100644
index 43e35f0..0000000
--- a/vendor/golang.org/x/net/bpf/setter.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-// A Setter is a type which can attach a compiled BPF filter to itself.
-type Setter interface {
- SetBPF(filter []RawInstruction) error
-}
diff --git a/vendor/golang.org/x/net/bpf/vm.go b/vendor/golang.org/x/net/bpf/vm.go
deleted file mode 100644
index 73f57f1..0000000
--- a/vendor/golang.org/x/net/bpf/vm.go
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-import (
- "errors"
- "fmt"
-)
-
-// A VM is an emulated BPF virtual machine.
-type VM struct {
- filter []Instruction
-}
-
-// NewVM returns a new VM using the input BPF program.
-func NewVM(filter []Instruction) (*VM, error) {
- if len(filter) == 0 {
- return nil, errors.New("one or more Instructions must be specified")
- }
-
- for i, ins := range filter {
- check := len(filter) - (i + 1)
- switch ins := ins.(type) {
- // Check for out-of-bounds jumps in instructions
- case Jump:
- if check <= int(ins.Skip) {
- return nil, fmt.Errorf("cannot jump %d instructions; jumping past program bounds", ins.Skip)
- }
- case JumpIf:
- if check <= int(ins.SkipTrue) {
- return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue)
- }
- if check <= int(ins.SkipFalse) {
- return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse)
- }
- case JumpIfX:
- if check <= int(ins.SkipTrue) {
- return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue)
- }
- if check <= int(ins.SkipFalse) {
- return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse)
- }
- // Check for division or modulus by zero
- case ALUOpConstant:
- if ins.Val != 0 {
- break
- }
-
- switch ins.Op {
- case ALUOpDiv, ALUOpMod:
- return nil, errors.New("cannot divide by zero using ALUOpConstant")
- }
- // Check for unknown extensions
- case LoadExtension:
- switch ins.Num {
- case ExtLen:
- default:
- return nil, fmt.Errorf("extension %d not implemented", ins.Num)
- }
- }
- }
-
- // Make sure last instruction is a return instruction
- switch filter[len(filter)-1].(type) {
- case RetA, RetConstant:
- default:
- return nil, errors.New("BPF program must end with RetA or RetConstant")
- }
-
- // Though our VM works using disassembled instructions, we
- // attempt to assemble the input filter anyway to ensure it is compatible
- // with an operating system VM.
- _, err := Assemble(filter)
-
- return &VM{
- filter: filter,
- }, err
-}
-
-// Run runs the VM's BPF program against the input bytes.
-// Run returns the number of bytes accepted by the BPF program, and any errors
-// which occurred while processing the program.
-func (v *VM) Run(in []byte) (int, error) {
- var (
- // Registers of the virtual machine
- regA uint32
- regX uint32
- regScratch [16]uint32
-
- // OK is true if the program should continue processing the next
- // instruction, or false if not, causing the loop to break
- ok = true
- )
-
- // TODO(mdlayher): implement:
- // - NegateA:
- // - would require a change from uint32 registers to int32
- // registers
-
- // TODO(mdlayher): add interop tests that check signedness of ALU
- // operations against kernel implementation, and make sure Go
- // implementation matches behavior
-
- for i := 0; i < len(v.filter) && ok; i++ {
- ins := v.filter[i]
-
- switch ins := ins.(type) {
- case ALUOpConstant:
- regA = aluOpConstant(ins, regA)
- case ALUOpX:
- regA, ok = aluOpX(ins, regA, regX)
- case Jump:
- i += int(ins.Skip)
- case JumpIf:
- jump := jumpIf(ins, regA)
- i += jump
- case JumpIfX:
- jump := jumpIfX(ins, regA, regX)
- i += jump
- case LoadAbsolute:
- regA, ok = loadAbsolute(ins, in)
- case LoadConstant:
- regA, regX = loadConstant(ins, regA, regX)
- case LoadExtension:
- regA = loadExtension(ins, in)
- case LoadIndirect:
- regA, ok = loadIndirect(ins, in, regX)
- case LoadMemShift:
- regX, ok = loadMemShift(ins, in)
- case LoadScratch:
- regA, regX = loadScratch(ins, regScratch, regA, regX)
- case RetA:
- return int(regA), nil
- case RetConstant:
- return int(ins.Val), nil
- case StoreScratch:
- regScratch = storeScratch(ins, regScratch, regA, regX)
- case TAX:
- regX = regA
- case TXA:
- regA = regX
- default:
- return 0, fmt.Errorf("unknown Instruction at index %d: %T", i, ins)
- }
- }
-
- return 0, nil
-}
diff --git a/vendor/golang.org/x/net/bpf/vm_instructions.go b/vendor/golang.org/x/net/bpf/vm_instructions.go
deleted file mode 100644
index cf8947c..0000000
--- a/vendor/golang.org/x/net/bpf/vm_instructions.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bpf
-
-import (
- "encoding/binary"
- "fmt"
-)
-
-func aluOpConstant(ins ALUOpConstant, regA uint32) uint32 {
- return aluOpCommon(ins.Op, regA, ins.Val)
-}
-
-func aluOpX(ins ALUOpX, regA uint32, regX uint32) (uint32, bool) {
- // Guard against division or modulus by zero by terminating
- // the program, as the OS BPF VM does
- if regX == 0 {
- switch ins.Op {
- case ALUOpDiv, ALUOpMod:
- return 0, false
- }
- }
-
- return aluOpCommon(ins.Op, regA, regX), true
-}
-
-func aluOpCommon(op ALUOp, regA uint32, value uint32) uint32 {
- switch op {
- case ALUOpAdd:
- return regA + value
- case ALUOpSub:
- return regA - value
- case ALUOpMul:
- return regA * value
- case ALUOpDiv:
- // Division by zero not permitted by NewVM and aluOpX checks
- return regA / value
- case ALUOpOr:
- return regA | value
- case ALUOpAnd:
- return regA & value
- case ALUOpShiftLeft:
- return regA << value
- case ALUOpShiftRight:
- return regA >> value
- case ALUOpMod:
- // Modulus by zero not permitted by NewVM and aluOpX checks
- return regA % value
- case ALUOpXor:
- return regA ^ value
- default:
- return regA
- }
-}
-
-func jumpIf(ins JumpIf, regA uint32) int {
- return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, ins.Val)
-}
-
-func jumpIfX(ins JumpIfX, regA uint32, regX uint32) int {
- return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, regX)
-}
-
-func jumpIfCommon(cond JumpTest, skipTrue, skipFalse uint8, regA uint32, value uint32) int {
- var ok bool
-
- switch cond {
- case JumpEqual:
- ok = regA == value
- case JumpNotEqual:
- ok = regA != value
- case JumpGreaterThan:
- ok = regA > value
- case JumpLessThan:
- ok = regA < value
- case JumpGreaterOrEqual:
- ok = regA >= value
- case JumpLessOrEqual:
- ok = regA <= value
- case JumpBitsSet:
- ok = (regA & value) != 0
- case JumpBitsNotSet:
- ok = (regA & value) == 0
- }
-
- if ok {
- return int(skipTrue)
- }
-
- return int(skipFalse)
-}
-
-func loadAbsolute(ins LoadAbsolute, in []byte) (uint32, bool) {
- offset := int(ins.Off)
- size := int(ins.Size)
-
- return loadCommon(in, offset, size)
-}
-
-func loadConstant(ins LoadConstant, regA uint32, regX uint32) (uint32, uint32) {
- switch ins.Dst {
- case RegA:
- regA = ins.Val
- case RegX:
- regX = ins.Val
- }
-
- return regA, regX
-}
-
-func loadExtension(ins LoadExtension, in []byte) uint32 {
- switch ins.Num {
- case ExtLen:
- return uint32(len(in))
- default:
- panic(fmt.Sprintf("unimplemented extension: %d", ins.Num))
- }
-}
-
-func loadIndirect(ins LoadIndirect, in []byte, regX uint32) (uint32, bool) {
- offset := int(ins.Off) + int(regX)
- size := int(ins.Size)
-
- return loadCommon(in, offset, size)
-}
-
-func loadMemShift(ins LoadMemShift, in []byte) (uint32, bool) {
- offset := int(ins.Off)
-
- // Size of LoadMemShift is always 1 byte
- if !inBounds(len(in), offset, 1) {
- return 0, false
- }
-
- // Mask off high 4 bits and multiply low 4 bits by 4
- return uint32(in[offset]&0x0f) * 4, true
-}
-
-func inBounds(inLen int, offset int, size int) bool {
- return offset+size <= inLen
-}
-
-func loadCommon(in []byte, offset int, size int) (uint32, bool) {
- if !inBounds(len(in), offset, size) {
- return 0, false
- }
-
- switch size {
- case 1:
- return uint32(in[offset]), true
- case 2:
- return uint32(binary.BigEndian.Uint16(in[offset : offset+size])), true
- case 4:
- return uint32(binary.BigEndian.Uint32(in[offset : offset+size])), true
- default:
- panic(fmt.Sprintf("invalid load size: %d", size))
- }
-}
-
-func loadScratch(ins LoadScratch, regScratch [16]uint32, regA uint32, regX uint32) (uint32, uint32) {
- switch ins.Dst {
- case RegA:
- regA = regScratch[ins.N]
- case RegX:
- regX = regScratch[ins.N]
- }
-
- return regA, regX
-}
-
-func storeScratch(ins StoreScratch, regScratch [16]uint32, regA uint32, regX uint32) [16]uint32 {
- switch ins.Src {
- case RegA:
- regScratch[ins.N] = regA
- case RegX:
- regScratch[ins.N] = regX
- }
-
- return regScratch
-}
diff --git a/vendor/golang.org/x/net/internal/iana/const.go b/vendor/golang.org/x/net/internal/iana/const.go
deleted file mode 100644
index cea712f..0000000
--- a/vendor/golang.org/x/net/internal/iana/const.go
+++ /dev/null
@@ -1,223 +0,0 @@
-// go generate gen.go
-// Code generated by the command above; DO NOT EDIT.
-
-// Package iana provides protocol number resources managed by the Internet Assigned Numbers Authority (IANA).
-package iana // import "golang.org/x/net/internal/iana"
-
-// Differentiated Services Field Codepoints (DSCP), Updated: 2018-05-04
-const (
- DiffServCS0 = 0x00 // CS0
- DiffServCS1 = 0x20 // CS1
- DiffServCS2 = 0x40 // CS2
- DiffServCS3 = 0x60 // CS3
- DiffServCS4 = 0x80 // CS4
- DiffServCS5 = 0xa0 // CS5
- DiffServCS6 = 0xc0 // CS6
- DiffServCS7 = 0xe0 // CS7
- DiffServAF11 = 0x28 // AF11
- DiffServAF12 = 0x30 // AF12
- DiffServAF13 = 0x38 // AF13
- DiffServAF21 = 0x48 // AF21
- DiffServAF22 = 0x50 // AF22
- DiffServAF23 = 0x58 // AF23
- DiffServAF31 = 0x68 // AF31
- DiffServAF32 = 0x70 // AF32
- DiffServAF33 = 0x78 // AF33
- DiffServAF41 = 0x88 // AF41
- DiffServAF42 = 0x90 // AF42
- DiffServAF43 = 0x98 // AF43
- DiffServEF = 0xb8 // EF
- DiffServVOICEADMIT = 0xb0 // VOICE-ADMIT
- NotECNTransport = 0x00 // Not-ECT (Not ECN-Capable Transport)
- ECNTransport1 = 0x01 // ECT(1) (ECN-Capable Transport(1))
- ECNTransport0 = 0x02 // ECT(0) (ECN-Capable Transport(0))
- CongestionExperienced = 0x03 // CE (Congestion Experienced)
-)
-
-// Protocol Numbers, Updated: 2017-10-13
-const (
- ProtocolIP = 0 // IPv4 encapsulation, pseudo protocol number
- ProtocolHOPOPT = 0 // IPv6 Hop-by-Hop Option
- ProtocolICMP = 1 // Internet Control Message
- ProtocolIGMP = 2 // Internet Group Management
- ProtocolGGP = 3 // Gateway-to-Gateway
- ProtocolIPv4 = 4 // IPv4 encapsulation
- ProtocolST = 5 // Stream
- ProtocolTCP = 6 // Transmission Control
- ProtocolCBT = 7 // CBT
- ProtocolEGP = 8 // Exterior Gateway Protocol
- ProtocolIGP = 9 // any private interior gateway (used by Cisco for their IGRP)
- ProtocolBBNRCCMON = 10 // BBN RCC Monitoring
- ProtocolNVPII = 11 // Network Voice Protocol
- ProtocolPUP = 12 // PUP
- ProtocolEMCON = 14 // EMCON
- ProtocolXNET = 15 // Cross Net Debugger
- ProtocolCHAOS = 16 // Chaos
- ProtocolUDP = 17 // User Datagram
- ProtocolMUX = 18 // Multiplexing
- ProtocolDCNMEAS = 19 // DCN Measurement Subsystems
- ProtocolHMP = 20 // Host Monitoring
- ProtocolPRM = 21 // Packet Radio Measurement
- ProtocolXNSIDP = 22 // XEROX NS IDP
- ProtocolTRUNK1 = 23 // Trunk-1
- ProtocolTRUNK2 = 24 // Trunk-2
- ProtocolLEAF1 = 25 // Leaf-1
- ProtocolLEAF2 = 26 // Leaf-2
- ProtocolRDP = 27 // Reliable Data Protocol
- ProtocolIRTP = 28 // Internet Reliable Transaction
- ProtocolISOTP4 = 29 // ISO Transport Protocol Class 4
- ProtocolNETBLT = 30 // Bulk Data Transfer Protocol
- ProtocolMFENSP = 31 // MFE Network Services Protocol
- ProtocolMERITINP = 32 // MERIT Internodal Protocol
- ProtocolDCCP = 33 // Datagram Congestion Control Protocol
- Protocol3PC = 34 // Third Party Connect Protocol
- ProtocolIDPR = 35 // Inter-Domain Policy Routing Protocol
- ProtocolXTP = 36 // XTP
- ProtocolDDP = 37 // Datagram Delivery Protocol
- ProtocolIDPRCMTP = 38 // IDPR Control Message Transport Proto
- ProtocolTPPP = 39 // TP++ Transport Protocol
- ProtocolIL = 40 // IL Transport Protocol
- ProtocolIPv6 = 41 // IPv6 encapsulation
- ProtocolSDRP = 42 // Source Demand Routing Protocol
- ProtocolIPv6Route = 43 // Routing Header for IPv6
- ProtocolIPv6Frag = 44 // Fragment Header for IPv6
- ProtocolIDRP = 45 // Inter-Domain Routing Protocol
- ProtocolRSVP = 46 // Reservation Protocol
- ProtocolGRE = 47 // Generic Routing Encapsulation
- ProtocolDSR = 48 // Dynamic Source Routing Protocol
- ProtocolBNA = 49 // BNA
- ProtocolESP = 50 // Encap Security Payload
- ProtocolAH = 51 // Authentication Header
- ProtocolINLSP = 52 // Integrated Net Layer Security TUBA
- ProtocolNARP = 54 // NBMA Address Resolution Protocol
- ProtocolMOBILE = 55 // IP Mobility
- ProtocolTLSP = 56 // Transport Layer Security Protocol using Kryptonet key management
- ProtocolSKIP = 57 // SKIP
- ProtocolIPv6ICMP = 58 // ICMP for IPv6
- ProtocolIPv6NoNxt = 59 // No Next Header for IPv6
- ProtocolIPv6Opts = 60 // Destination Options for IPv6
- ProtocolCFTP = 62 // CFTP
- ProtocolSATEXPAK = 64 // SATNET and Backroom EXPAK
- ProtocolKRYPTOLAN = 65 // Kryptolan
- ProtocolRVD = 66 // MIT Remote Virtual Disk Protocol
- ProtocolIPPC = 67 // Internet Pluribus Packet Core
- ProtocolSATMON = 69 // SATNET Monitoring
- ProtocolVISA = 70 // VISA Protocol
- ProtocolIPCV = 71 // Internet Packet Core Utility
- ProtocolCPNX = 72 // Computer Protocol Network Executive
- ProtocolCPHB = 73 // Computer Protocol Heart Beat
- ProtocolWSN = 74 // Wang Span Network
- ProtocolPVP = 75 // Packet Video Protocol
- ProtocolBRSATMON = 76 // Backroom SATNET Monitoring
- ProtocolSUNND = 77 // SUN ND PROTOCOL-Temporary
- ProtocolWBMON = 78 // WIDEBAND Monitoring
- ProtocolWBEXPAK = 79 // WIDEBAND EXPAK
- ProtocolISOIP = 80 // ISO Internet Protocol
- ProtocolVMTP = 81 // VMTP
- ProtocolSECUREVMTP = 82 // SECURE-VMTP
- ProtocolVINES = 83 // VINES
- ProtocolTTP = 84 // Transaction Transport Protocol
- ProtocolIPTM = 84 // Internet Protocol Traffic Manager
- ProtocolNSFNETIGP = 85 // NSFNET-IGP
- ProtocolDGP = 86 // Dissimilar Gateway Protocol
- ProtocolTCF = 87 // TCF
- ProtocolEIGRP = 88 // EIGRP
- ProtocolOSPFIGP = 89 // OSPFIGP
- ProtocolSpriteRPC = 90 // Sprite RPC Protocol
- ProtocolLARP = 91 // Locus Address Resolution Protocol
- ProtocolMTP = 92 // Multicast Transport Protocol
- ProtocolAX25 = 93 // AX.25 Frames
- ProtocolIPIP = 94 // IP-within-IP Encapsulation Protocol
- ProtocolSCCSP = 96 // Semaphore Communications Sec. Pro.
- ProtocolETHERIP = 97 // Ethernet-within-IP Encapsulation
- ProtocolENCAP = 98 // Encapsulation Header
- ProtocolGMTP = 100 // GMTP
- ProtocolIFMP = 101 // Ipsilon Flow Management Protocol
- ProtocolPNNI = 102 // PNNI over IP
- ProtocolPIM = 103 // Protocol Independent Multicast
- ProtocolARIS = 104 // ARIS
- ProtocolSCPS = 105 // SCPS
- ProtocolQNX = 106 // QNX
- ProtocolAN = 107 // Active Networks
- ProtocolIPComp = 108 // IP Payload Compression Protocol
- ProtocolSNP = 109 // Sitara Networks Protocol
- ProtocolCompaqPeer = 110 // Compaq Peer Protocol
- ProtocolIPXinIP = 111 // IPX in IP
- ProtocolVRRP = 112 // Virtual Router Redundancy Protocol
- ProtocolPGM = 113 // PGM Reliable Transport Protocol
- ProtocolL2TP = 115 // Layer Two Tunneling Protocol
- ProtocolDDX = 116 // D-II Data Exchange (DDX)
- ProtocolIATP = 117 // Interactive Agent Transfer Protocol
- ProtocolSTP = 118 // Schedule Transfer Protocol
- ProtocolSRP = 119 // SpectraLink Radio Protocol
- ProtocolUTI = 120 // UTI
- ProtocolSMP = 121 // Simple Message Protocol
- ProtocolPTP = 123 // Performance Transparency Protocol
- ProtocolISIS = 124 // ISIS over IPv4
- ProtocolFIRE = 125 // FIRE
- ProtocolCRTP = 126 // Combat Radio Transport Protocol
- ProtocolCRUDP = 127 // Combat Radio User Datagram
- ProtocolSSCOPMCE = 128 // SSCOPMCE
- ProtocolIPLT = 129 // IPLT
- ProtocolSPS = 130 // Secure Packet Shield
- ProtocolPIPE = 131 // Private IP Encapsulation within IP
- ProtocolSCTP = 132 // Stream Control Transmission Protocol
- ProtocolFC = 133 // Fibre Channel
- ProtocolRSVPE2EIGNORE = 134 // RSVP-E2E-IGNORE
- ProtocolMobilityHeader = 135 // Mobility Header
- ProtocolUDPLite = 136 // UDPLite
- ProtocolMPLSinIP = 137 // MPLS-in-IP
- ProtocolMANET = 138 // MANET Protocols
- ProtocolHIP = 139 // Host Identity Protocol
- ProtocolShim6 = 140 // Shim6 Protocol
- ProtocolWESP = 141 // Wrapped Encapsulating Security Payload
- ProtocolROHC = 142 // Robust Header Compression
- ProtocolReserved = 255 // Reserved
-)
-
-// Address Family Numbers, Updated: 2018-04-02
-const (
- AddrFamilyIPv4 = 1 // IP (IP version 4)
- AddrFamilyIPv6 = 2 // IP6 (IP version 6)
- AddrFamilyNSAP = 3 // NSAP
- AddrFamilyHDLC = 4 // HDLC (8-bit multidrop)
- AddrFamilyBBN1822 = 5 // BBN 1822
- AddrFamily802 = 6 // 802 (includes all 802 media plus Ethernet "canonical format")
- AddrFamilyE163 = 7 // E.163
- AddrFamilyE164 = 8 // E.164 (SMDS, Frame Relay, ATM)
- AddrFamilyF69 = 9 // F.69 (Telex)
- AddrFamilyX121 = 10 // X.121 (X.25, Frame Relay)
- AddrFamilyIPX = 11 // IPX
- AddrFamilyAppletalk = 12 // Appletalk
- AddrFamilyDecnetIV = 13 // Decnet IV
- AddrFamilyBanyanVines = 14 // Banyan Vines
- AddrFamilyE164withSubaddress = 15 // E.164 with NSAP format subaddress
- AddrFamilyDNS = 16 // DNS (Domain Name System)
- AddrFamilyDistinguishedName = 17 // Distinguished Name
- AddrFamilyASNumber = 18 // AS Number
- AddrFamilyXTPoverIPv4 = 19 // XTP over IP version 4
- AddrFamilyXTPoverIPv6 = 20 // XTP over IP version 6
- AddrFamilyXTPnativemodeXTP = 21 // XTP native mode XTP
- AddrFamilyFibreChannelWorldWidePortName = 22 // Fibre Channel World-Wide Port Name
- AddrFamilyFibreChannelWorldWideNodeName = 23 // Fibre Channel World-Wide Node Name
- AddrFamilyGWID = 24 // GWID
- AddrFamilyL2VPN = 25 // AFI for L2VPN information
- AddrFamilyMPLSTPSectionEndpointID = 26 // MPLS-TP Section Endpoint Identifier
- AddrFamilyMPLSTPLSPEndpointID = 27 // MPLS-TP LSP Endpoint Identifier
- AddrFamilyMPLSTPPseudowireEndpointID = 28 // MPLS-TP Pseudowire Endpoint Identifier
- AddrFamilyMTIPv4 = 29 // MT IP: Multi-Topology IP version 4
- AddrFamilyMTIPv6 = 30 // MT IPv6: Multi-Topology IP version 6
- AddrFamilyEIGRPCommonServiceFamily = 16384 // EIGRP Common Service Family
- AddrFamilyEIGRPIPv4ServiceFamily = 16385 // EIGRP IPv4 Service Family
- AddrFamilyEIGRPIPv6ServiceFamily = 16386 // EIGRP IPv6 Service Family
- AddrFamilyLISPCanonicalAddressFormat = 16387 // LISP Canonical Address Format (LCAF)
- AddrFamilyBGPLS = 16388 // BGP-LS
- AddrFamily48bitMAC = 16389 // 48-bit MAC
- AddrFamily64bitMAC = 16390 // 64-bit MAC
- AddrFamilyOUI = 16391 // OUI
- AddrFamilyMACFinal24bits = 16392 // MAC/24
- AddrFamilyMACFinal40bits = 16393 // MAC/40
- AddrFamilyIPv6Initial64bits = 16394 // IPv6/64
- AddrFamilyRBridgePortID = 16395 // RBridge Port ID
- AddrFamilyTRILLNickname = 16396 // TRILL Nickname
-)
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr.go b/vendor/golang.org/x/net/internal/socket/cmsghdr.go
deleted file mode 100644
index 4bdaaaf..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package socket
-
-func (h *cmsghdr) len() int { return int(h.Len) }
-func (h *cmsghdr) lvl() int { return int(h.Level) }
-func (h *cmsghdr) typ() int { return int(h.Type) }
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_bsd.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_bsd.go
deleted file mode 100644
index 0d30e0a..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_bsd.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd || openbsd
-// +build aix darwin dragonfly freebsd netbsd openbsd
-
-package socket
-
-func (h *cmsghdr) set(l, lvl, typ int) {
- h.Len = uint32(l)
- h.Level = int32(lvl)
- h.Type = int32(typ)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_32bit.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_32bit.go
deleted file mode 100644
index 4936e8a..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_32bit.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm || mips || mipsle || 386 || ppc) && linux
-// +build arm mips mipsle 386 ppc
-// +build linux
-
-package socket
-
-func (h *cmsghdr) set(l, lvl, typ int) {
- h.Len = uint32(l)
- h.Level = int32(lvl)
- h.Type = int32(typ)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_64bit.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_64bit.go
deleted file mode 100644
index 1ba4310..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_linux_64bit.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm64 || amd64 || ppc64 || ppc64le || mips64 || mips64le || riscv64 || s390x) && linux
-// +build arm64 amd64 ppc64 ppc64le mips64 mips64le riscv64 s390x
-// +build linux
-
-package socket
-
-func (h *cmsghdr) set(l, lvl, typ int) {
- h.Len = uint64(l)
- h.Level = int32(lvl)
- h.Type = int32(typ)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_solaris_64bit.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_solaris_64bit.go
deleted file mode 100644
index d3dbe1b..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_solaris_64bit.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && solaris
-// +build amd64,solaris
-
-package socket
-
-func (h *cmsghdr) set(l, lvl, typ int) {
- h.Len = uint32(l)
- h.Level = int32(lvl)
- h.Type = int32(typ)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_stub.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_stub.go
deleted file mode 100644
index 1d9f2ed..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_stub.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!zos
-
-package socket
-
-func controlHeaderLen() int {
- return 0
-}
-
-func controlMessageLen(dataLen int) int {
- return 0
-}
-
-func controlMessageSpace(dataLen int) int {
- return 0
-}
-
-type cmsghdr struct{}
-
-func (h *cmsghdr) len() int { return 0 }
-func (h *cmsghdr) lvl() int { return 0 }
-func (h *cmsghdr) typ() int { return 0 }
-
-func (h *cmsghdr) set(l, lvl, typ int) {}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_unix.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_unix.go
deleted file mode 100644
index aa1b062..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_unix.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package socket
-
-import "golang.org/x/sys/unix"
-
-func controlHeaderLen() int {
- return unix.CmsgLen(0)
-}
-
-func controlMessageLen(dataLen int) int {
- return unix.CmsgLen(dataLen)
-}
-
-func controlMessageSpace(dataLen int) int {
- return unix.CmsgSpace(dataLen)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/cmsghdr_zos_s390x.go b/vendor/golang.org/x/net/internal/socket/cmsghdr_zos_s390x.go
deleted file mode 100644
index 98be146..0000000
--- a/vendor/golang.org/x/net/internal/socket/cmsghdr_zos_s390x.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import "syscall"
-
-func (h *cmsghdr) set(l, lvl, typ int) {
- h.Len = int32(l)
- h.Level = int32(lvl)
- h.Type = int32(typ)
-}
-
-func controlHeaderLen() int {
- return syscall.CmsgLen(0)
-}
-
-func controlMessageLen(dataLen int) int {
- return syscall.CmsgLen(dataLen)
-}
-
-func controlMessageSpace(dataLen int) int {
- return syscall.CmsgSpace(dataLen)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/complete_dontwait.go b/vendor/golang.org/x/net/internal/socket/complete_dontwait.go
deleted file mode 100644
index 5b1d50a..0000000
--- a/vendor/golang.org/x/net/internal/socket/complete_dontwait.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package socket
-
-import (
- "syscall"
-)
-
-// ioComplete checks the flags and result of a syscall, to be used as return
-// value in a syscall.RawConn.Read or Write callback.
-func ioComplete(flags int, operr error) bool {
- if flags&syscall.MSG_DONTWAIT != 0 {
- // Caller explicitly said don't wait, so always return immediately.
- return true
- }
- if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
- // No data available, block for I/O and try again.
- return false
- }
- return true
-}
diff --git a/vendor/golang.org/x/net/internal/socket/complete_nodontwait.go b/vendor/golang.org/x/net/internal/socket/complete_nodontwait.go
deleted file mode 100644
index be63409..0000000
--- a/vendor/golang.org/x/net/internal/socket/complete_nodontwait.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || windows || zos
-// +build aix windows zos
-
-package socket
-
-import (
- "syscall"
-)
-
-// ioComplete checks the flags and result of a syscall, to be used as return
-// value in a syscall.RawConn.Read or Write callback.
-func ioComplete(flags int, operr error) bool {
- if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
- // No data available, block for I/O and try again.
- return false
- }
- return true
-}
diff --git a/vendor/golang.org/x/net/internal/socket/empty.s b/vendor/golang.org/x/net/internal/socket/empty.s
deleted file mode 100644
index 90ab4ca..0000000
--- a/vendor/golang.org/x/net/internal/socket/empty.s
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin && go1.12
-// +build darwin,go1.12
-
-// This exists solely so we can linkname in symbols from syscall.
diff --git a/vendor/golang.org/x/net/internal/socket/error_unix.go b/vendor/golang.org/x/net/internal/socket/error_unix.go
deleted file mode 100644
index 78f4129..0000000
--- a/vendor/golang.org/x/net/internal/socket/error_unix.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package socket
-
-import "syscall"
-
-var (
- errEAGAIN error = syscall.EAGAIN
- errEINVAL error = syscall.EINVAL
- errENOENT error = syscall.ENOENT
-)
-
-// errnoErr returns common boxed Errno values, to prevent allocations
-// at runtime.
-func errnoErr(errno syscall.Errno) error {
- switch errno {
- case 0:
- return nil
- case syscall.EAGAIN:
- return errEAGAIN
- case syscall.EINVAL:
- return errEINVAL
- case syscall.ENOENT:
- return errENOENT
- }
- return errno
-}
diff --git a/vendor/golang.org/x/net/internal/socket/error_windows.go b/vendor/golang.org/x/net/internal/socket/error_windows.go
deleted file mode 100644
index 6a6379a..0000000
--- a/vendor/golang.org/x/net/internal/socket/error_windows.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import "syscall"
-
-var (
- errERROR_IO_PENDING error = syscall.ERROR_IO_PENDING
- errEINVAL error = syscall.EINVAL
-)
-
-// errnoErr returns common boxed Errno values, to prevent allocations
-// at runtime.
-func errnoErr(errno syscall.Errno) error {
- switch errno {
- case 0:
- return nil
- case syscall.ERROR_IO_PENDING:
- return errERROR_IO_PENDING
- case syscall.EINVAL:
- return errEINVAL
- }
- return errno
-}
diff --git a/vendor/golang.org/x/net/internal/socket/iovec_32bit.go b/vendor/golang.org/x/net/internal/socket/iovec_32bit.go
deleted file mode 100644
index 2b8fbb3..0000000
--- a/vendor/golang.org/x/net/internal/socket/iovec_32bit.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm || mips || mipsle || 386 || ppc) && (darwin || dragonfly || freebsd || linux || netbsd || openbsd)
-// +build arm mips mipsle 386 ppc
-// +build darwin dragonfly freebsd linux netbsd openbsd
-
-package socket
-
-import "unsafe"
-
-func (v *iovec) set(b []byte) {
- l := len(b)
- if l == 0 {
- return
- }
- v.Base = (*byte)(unsafe.Pointer(&b[0]))
- v.Len = uint32(l)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/iovec_64bit.go b/vendor/golang.org/x/net/internal/socket/iovec_64bit.go
deleted file mode 100644
index 3dc5def..0000000
--- a/vendor/golang.org/x/net/internal/socket/iovec_64bit.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm64 || amd64 || ppc64 || ppc64le || mips64 || mips64le || riscv64 || s390x) && (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || zos)
-// +build arm64 amd64 ppc64 ppc64le mips64 mips64le riscv64 s390x
-// +build aix darwin dragonfly freebsd linux netbsd openbsd zos
-
-package socket
-
-import "unsafe"
-
-func (v *iovec) set(b []byte) {
- l := len(b)
- if l == 0 {
- return
- }
- v.Base = (*byte)(unsafe.Pointer(&b[0]))
- v.Len = uint64(l)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/iovec_solaris_64bit.go b/vendor/golang.org/x/net/internal/socket/iovec_solaris_64bit.go
deleted file mode 100644
index f7da2bc..0000000
--- a/vendor/golang.org/x/net/internal/socket/iovec_solaris_64bit.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && solaris
-// +build amd64,solaris
-
-package socket
-
-import "unsafe"
-
-func (v *iovec) set(b []byte) {
- l := len(b)
- if l == 0 {
- return
- }
- v.Base = (*int8)(unsafe.Pointer(&b[0]))
- v.Len = uint64(l)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/iovec_stub.go b/vendor/golang.org/x/net/internal/socket/iovec_stub.go
deleted file mode 100644
index 14caf52..0000000
--- a/vendor/golang.org/x/net/internal/socket/iovec_stub.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!zos
-
-package socket
-
-type iovec struct{}
-
-func (v *iovec) set(b []byte) {}
diff --git a/vendor/golang.org/x/net/internal/socket/mmsghdr_stub.go b/vendor/golang.org/x/net/internal/socket/mmsghdr_stub.go
deleted file mode 100644
index 113e773..0000000
--- a/vendor/golang.org/x/net/internal/socket/mmsghdr_stub.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !linux && !netbsd
-// +build !aix,!linux,!netbsd
-
-package socket
-
-import "net"
-
-type mmsghdr struct{}
-
-type mmsghdrs []mmsghdr
-
-func (hs mmsghdrs) pack(ms []Message, parseFn func([]byte, string) (net.Addr, error), marshalFn func(net.Addr) []byte) error {
- return nil
-}
-
-func (hs mmsghdrs) unpack(ms []Message, parseFn func([]byte, string) (net.Addr, error), hint string) error {
- return nil
-}
diff --git a/vendor/golang.org/x/net/internal/socket/mmsghdr_unix.go b/vendor/golang.org/x/net/internal/socket/mmsghdr_unix.go
deleted file mode 100644
index 40ebeda..0000000
--- a/vendor/golang.org/x/net/internal/socket/mmsghdr_unix.go
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || linux || netbsd
-// +build aix linux netbsd
-
-package socket
-
-import (
- "net"
- "sync"
-)
-
-type mmsghdrs []mmsghdr
-
-func (hs mmsghdrs) unpack(ms []Message, parseFn func([]byte, string) (net.Addr, error), hint string) error {
- for i := range hs {
- ms[i].N = int(hs[i].Len)
- ms[i].NN = hs[i].Hdr.controllen()
- ms[i].Flags = hs[i].Hdr.flags()
- if parseFn != nil {
- var err error
- ms[i].Addr, err = parseFn(hs[i].Hdr.name(), hint)
- if err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// mmsghdrsPacker packs Message-slices into mmsghdrs (re-)using pre-allocated buffers.
-type mmsghdrsPacker struct {
- // hs are the pre-allocated mmsghdrs.
- hs mmsghdrs
- // sockaddrs is the pre-allocated buffer for the Hdr.Name buffers.
- // We use one large buffer for all messages and slice it up.
- sockaddrs []byte
- // vs are the pre-allocated iovecs.
- // We allocate one large buffer for all messages and slice it up. This allows to reuse the buffer
- // if the number of buffers per message is distributed differently between calls.
- vs []iovec
-}
-
-func (p *mmsghdrsPacker) prepare(ms []Message) {
- n := len(ms)
- if n <= cap(p.hs) {
- p.hs = p.hs[:n]
- } else {
- p.hs = make(mmsghdrs, n)
- }
- if n*sizeofSockaddrInet6 <= cap(p.sockaddrs) {
- p.sockaddrs = p.sockaddrs[:n*sizeofSockaddrInet6]
- } else {
- p.sockaddrs = make([]byte, n*sizeofSockaddrInet6)
- }
-
- nb := 0
- for _, m := range ms {
- nb += len(m.Buffers)
- }
- if nb <= cap(p.vs) {
- p.vs = p.vs[:nb]
- } else {
- p.vs = make([]iovec, nb)
- }
-}
-
-func (p *mmsghdrsPacker) pack(ms []Message, parseFn func([]byte, string) (net.Addr, error), marshalFn func(net.Addr, []byte) int) mmsghdrs {
- p.prepare(ms)
- hs := p.hs
- vsRest := p.vs
- saRest := p.sockaddrs
- for i := range hs {
- nvs := len(ms[i].Buffers)
- vs := vsRest[:nvs]
- vsRest = vsRest[nvs:]
-
- var sa []byte
- if parseFn != nil {
- sa = saRest[:sizeofSockaddrInet6]
- saRest = saRest[sizeofSockaddrInet6:]
- } else if marshalFn != nil {
- n := marshalFn(ms[i].Addr, saRest)
- if n > 0 {
- sa = saRest[:n]
- saRest = saRest[n:]
- }
- }
- hs[i].Hdr.pack(vs, ms[i].Buffers, ms[i].OOB, sa)
- }
- return hs
-}
-
-var defaultMmsghdrsPool = mmsghdrsPool{
- p: sync.Pool{
- New: func() interface{} {
- return new(mmsghdrsPacker)
- },
- },
-}
-
-type mmsghdrsPool struct {
- p sync.Pool
-}
-
-func (p *mmsghdrsPool) Get() *mmsghdrsPacker {
- return p.p.Get().(*mmsghdrsPacker)
-}
-
-func (p *mmsghdrsPool) Put(packer *mmsghdrsPacker) {
- p.p.Put(packer)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_bsd.go b/vendor/golang.org/x/net/internal/socket/msghdr_bsd.go
deleted file mode 100644
index 25f6847..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_bsd.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd || openbsd
-// +build aix darwin dragonfly freebsd netbsd openbsd
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) pack(vs []iovec, bs [][]byte, oob []byte, sa []byte) {
- for i := range vs {
- vs[i].set(bs[i])
- }
- h.setIov(vs)
- if len(oob) > 0 {
- h.Control = (*byte)(unsafe.Pointer(&oob[0]))
- h.Controllen = uint32(len(oob))
- }
- if sa != nil {
- h.Name = (*byte)(unsafe.Pointer(&sa[0]))
- h.Namelen = uint32(len(sa))
- }
-}
-
-func (h *msghdr) name() []byte {
- if h.Name != nil && h.Namelen > 0 {
- return (*[sizeofSockaddrInet6]byte)(unsafe.Pointer(h.Name))[:h.Namelen]
- }
- return nil
-}
-
-func (h *msghdr) controllen() int {
- return int(h.Controllen)
-}
-
-func (h *msghdr) flags() int {
- return int(h.Flags)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_bsdvar.go b/vendor/golang.org/x/net/internal/socket/msghdr_bsdvar.go
deleted file mode 100644
index 5b8e00f..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_bsdvar.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd
-// +build aix darwin dragonfly freebsd netbsd
-
-package socket
-
-func (h *msghdr) setIov(vs []iovec) {
- l := len(vs)
- if l == 0 {
- return
- }
- h.Iov = &vs[0]
- h.Iovlen = int32(l)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_linux.go b/vendor/golang.org/x/net/internal/socket/msghdr_linux.go
deleted file mode 100644
index c3c7cc4..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_linux.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) pack(vs []iovec, bs [][]byte, oob []byte, sa []byte) {
- for i := range vs {
- vs[i].set(bs[i])
- }
- h.setIov(vs)
- if len(oob) > 0 {
- h.setControl(oob)
- }
- if sa != nil {
- h.Name = (*byte)(unsafe.Pointer(&sa[0]))
- h.Namelen = uint32(len(sa))
- } else {
- h.Name = nil
- h.Namelen = 0
- }
-}
-
-func (h *msghdr) name() []byte {
- if h.Name != nil && h.Namelen > 0 {
- return (*[sizeofSockaddrInet6]byte)(unsafe.Pointer(h.Name))[:h.Namelen]
- }
- return nil
-}
-
-func (h *msghdr) controllen() int {
- return int(h.Controllen)
-}
-
-func (h *msghdr) flags() int {
- return int(h.Flags)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_linux_32bit.go b/vendor/golang.org/x/net/internal/socket/msghdr_linux_32bit.go
deleted file mode 100644
index b4658fb..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_linux_32bit.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm || mips || mipsle || 386 || ppc) && linux
-// +build arm mips mipsle 386 ppc
-// +build linux
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) setIov(vs []iovec) {
- l := len(vs)
- if l == 0 {
- return
- }
- h.Iov = &vs[0]
- h.Iovlen = uint32(l)
-}
-
-func (h *msghdr) setControl(b []byte) {
- h.Control = (*byte)(unsafe.Pointer(&b[0]))
- h.Controllen = uint32(len(b))
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_linux_64bit.go b/vendor/golang.org/x/net/internal/socket/msghdr_linux_64bit.go
deleted file mode 100644
index c9c592d..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_linux_64bit.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (arm64 || amd64 || ppc64 || ppc64le || mips64 || mips64le || riscv64 || s390x) && linux
-// +build arm64 amd64 ppc64 ppc64le mips64 mips64le riscv64 s390x
-// +build linux
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) setIov(vs []iovec) {
- l := len(vs)
- if l == 0 {
- return
- }
- h.Iov = &vs[0]
- h.Iovlen = uint64(l)
-}
-
-func (h *msghdr) setControl(b []byte) {
- h.Control = (*byte)(unsafe.Pointer(&b[0]))
- h.Controllen = uint64(len(b))
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_openbsd.go b/vendor/golang.org/x/net/internal/socket/msghdr_openbsd.go
deleted file mode 100644
index 71a69e2..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_openbsd.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-func (h *msghdr) setIov(vs []iovec) {
- l := len(vs)
- if l == 0 {
- return
- }
- h.Iov = &vs[0]
- h.Iovlen = uint32(l)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_solaris_64bit.go b/vendor/golang.org/x/net/internal/socket/msghdr_solaris_64bit.go
deleted file mode 100644
index 3098f5d..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_solaris_64bit.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build amd64 && solaris
-// +build amd64,solaris
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) pack(vs []iovec, bs [][]byte, oob []byte, sa []byte) {
- for i := range vs {
- vs[i].set(bs[i])
- }
- if len(vs) > 0 {
- h.Iov = &vs[0]
- h.Iovlen = int32(len(vs))
- }
- if len(oob) > 0 {
- h.Accrights = (*int8)(unsafe.Pointer(&oob[0]))
- h.Accrightslen = int32(len(oob))
- }
- if sa != nil {
- h.Name = (*byte)(unsafe.Pointer(&sa[0]))
- h.Namelen = uint32(len(sa))
- }
-}
-
-func (h *msghdr) controllen() int {
- return int(h.Accrightslen)
-}
-
-func (h *msghdr) flags() int {
- return int(NativeEndian.Uint32(h.Pad_cgo_2[:]))
-}
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_stub.go b/vendor/golang.org/x/net/internal/socket/msghdr_stub.go
deleted file mode 100644
index eb79151..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_stub.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!zos
-
-package socket
-
-type msghdr struct{}
-
-func (h *msghdr) pack(vs []iovec, bs [][]byte, oob []byte, sa []byte) {}
-func (h *msghdr) name() []byte { return nil }
-func (h *msghdr) controllen() int { return 0 }
-func (h *msghdr) flags() int { return 0 }
diff --git a/vendor/golang.org/x/net/internal/socket/msghdr_zos_s390x.go b/vendor/golang.org/x/net/internal/socket/msghdr_zos_s390x.go
deleted file mode 100644
index 324e9ee..0000000
--- a/vendor/golang.org/x/net/internal/socket/msghdr_zos_s390x.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build s390x && zos
-// +build s390x,zos
-
-package socket
-
-import "unsafe"
-
-func (h *msghdr) pack(vs []iovec, bs [][]byte, oob []byte, sa []byte) {
- for i := range vs {
- vs[i].set(bs[i])
- }
- if len(vs) > 0 {
- h.Iov = &vs[0]
- h.Iovlen = int32(len(vs))
- }
- if len(oob) > 0 {
- h.Control = (*byte)(unsafe.Pointer(&oob[0]))
- h.Controllen = uint32(len(oob))
- }
- if sa != nil {
- h.Name = (*byte)(unsafe.Pointer(&sa[0]))
- h.Namelen = uint32(len(sa))
- }
-}
-
-func (h *msghdr) controllen() int {
- return int(h.Controllen)
-}
-
-func (h *msghdr) flags() int {
- return int(h.Flags)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/norace.go b/vendor/golang.org/x/net/internal/socket/norace.go
deleted file mode 100644
index de0ad42..0000000
--- a/vendor/golang.org/x/net/internal/socket/norace.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !race
-// +build !race
-
-package socket
-
-func (m *Message) raceRead() {
-}
-func (m *Message) raceWrite() {
-}
diff --git a/vendor/golang.org/x/net/internal/socket/race.go b/vendor/golang.org/x/net/internal/socket/race.go
deleted file mode 100644
index f0a28a6..0000000
--- a/vendor/golang.org/x/net/internal/socket/race.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build race
-// +build race
-
-package socket
-
-import (
- "runtime"
- "unsafe"
-)
-
-// This package reads and writes the Message buffers using a
-// direct system call, which the race detector can't see.
-// These functions tell the race detector what is going on during the syscall.
-
-func (m *Message) raceRead() {
- for _, b := range m.Buffers {
- if len(b) > 0 {
- runtime.RaceReadRange(unsafe.Pointer(&b[0]), len(b))
- }
- }
- if b := m.OOB; len(b) > 0 {
- runtime.RaceReadRange(unsafe.Pointer(&b[0]), len(b))
- }
-}
-func (m *Message) raceWrite() {
- for _, b := range m.Buffers {
- if len(b) > 0 {
- runtime.RaceWriteRange(unsafe.Pointer(&b[0]), len(b))
- }
- }
- if b := m.OOB; len(b) > 0 {
- runtime.RaceWriteRange(unsafe.Pointer(&b[0]), len(b))
- }
-}
diff --git a/vendor/golang.org/x/net/internal/socket/rawconn.go b/vendor/golang.org/x/net/internal/socket/rawconn.go
deleted file mode 100644
index 87e8107..0000000
--- a/vendor/golang.org/x/net/internal/socket/rawconn.go
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "errors"
- "net"
- "os"
- "syscall"
-)
-
-// A Conn represents a raw connection.
-type Conn struct {
- network string
- c syscall.RawConn
-}
-
-// tcpConn is an interface implemented by net.TCPConn.
-// It can be used for interface assertions to check if a net.Conn is a TCP connection.
-type tcpConn interface {
- SyscallConn() (syscall.RawConn, error)
- SetLinger(int) error
-}
-
-var _ tcpConn = (*net.TCPConn)(nil)
-
-// udpConn is an interface implemented by net.UDPConn.
-// It can be used for interface assertions to check if a net.Conn is a UDP connection.
-type udpConn interface {
- SyscallConn() (syscall.RawConn, error)
- ReadMsgUDP(b, oob []byte) (n, oobn, flags int, addr *net.UDPAddr, err error)
-}
-
-var _ udpConn = (*net.UDPConn)(nil)
-
-// ipConn is an interface implemented by net.IPConn.
-// It can be used for interface assertions to check if a net.Conn is an IP connection.
-type ipConn interface {
- SyscallConn() (syscall.RawConn, error)
- ReadMsgIP(b, oob []byte) (n, oobn, flags int, addr *net.IPAddr, err error)
-}
-
-var _ ipConn = (*net.IPConn)(nil)
-
-// NewConn returns a new raw connection.
-func NewConn(c net.Conn) (*Conn, error) {
- var err error
- var cc Conn
- switch c := c.(type) {
- case tcpConn:
- cc.network = "tcp"
- cc.c, err = c.SyscallConn()
- case udpConn:
- cc.network = "udp"
- cc.c, err = c.SyscallConn()
- case ipConn:
- cc.network = "ip"
- cc.c, err = c.SyscallConn()
- default:
- return nil, errors.New("unknown connection type")
- }
- if err != nil {
- return nil, err
- }
- return &cc, nil
-}
-
-func (o *Option) get(c *Conn, b []byte) (int, error) {
- var operr error
- var n int
- fn := func(s uintptr) {
- n, operr = getsockopt(s, o.Level, o.Name, b)
- }
- if err := c.c.Control(fn); err != nil {
- return 0, err
- }
- return n, os.NewSyscallError("getsockopt", operr)
-}
-
-func (o *Option) set(c *Conn, b []byte) error {
- var operr error
- fn := func(s uintptr) {
- operr = setsockopt(s, o.Level, o.Name, b)
- }
- if err := c.c.Control(fn); err != nil {
- return err
- }
- return os.NewSyscallError("setsockopt", operr)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/rawconn_mmsg.go b/vendor/golang.org/x/net/internal/socket/rawconn_mmsg.go
deleted file mode 100644
index 3fcb51b..0000000
--- a/vendor/golang.org/x/net/internal/socket/rawconn_mmsg.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux
-// +build linux
-
-package socket
-
-import (
- "net"
- "os"
-)
-
-func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
- for i := range ms {
- ms[i].raceWrite()
- }
- packer := defaultMmsghdrsPool.Get()
- defer defaultMmsghdrsPool.Put(packer)
- var parseFn func([]byte, string) (net.Addr, error)
- if c.network != "tcp" {
- parseFn = parseInetAddr
- }
- hs := packer.pack(ms, parseFn, nil)
- var operr error
- var n int
- fn := func(s uintptr) bool {
- n, operr = recvmmsg(s, hs, flags)
- return ioComplete(flags, operr)
- }
- if err := c.c.Read(fn); err != nil {
- return n, err
- }
- if operr != nil {
- return n, os.NewSyscallError("recvmmsg", operr)
- }
- if err := hs[:n].unpack(ms[:n], parseFn, c.network); err != nil {
- return n, err
- }
- return n, nil
-}
-
-func (c *Conn) sendMsgs(ms []Message, flags int) (int, error) {
- for i := range ms {
- ms[i].raceRead()
- }
- packer := defaultMmsghdrsPool.Get()
- defer defaultMmsghdrsPool.Put(packer)
- var marshalFn func(net.Addr, []byte) int
- if c.network != "tcp" {
- marshalFn = marshalInetAddr
- }
- hs := packer.pack(ms, nil, marshalFn)
- var operr error
- var n int
- fn := func(s uintptr) bool {
- n, operr = sendmmsg(s, hs, flags)
- return ioComplete(flags, operr)
- }
- if err := c.c.Write(fn); err != nil {
- return n, err
- }
- if operr != nil {
- return n, os.NewSyscallError("sendmmsg", operr)
- }
- if err := hs[:n].unpack(ms[:n], nil, ""); err != nil {
- return n, err
- }
- return n, nil
-}
diff --git a/vendor/golang.org/x/net/internal/socket/rawconn_msg.go b/vendor/golang.org/x/net/internal/socket/rawconn_msg.go
deleted file mode 100644
index ba53f56..0000000
--- a/vendor/golang.org/x/net/internal/socket/rawconn_msg.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows zos
-
-package socket
-
-import (
- "os"
-)
-
-func (c *Conn) recvMsg(m *Message, flags int) error {
- m.raceWrite()
- var h msghdr
- vs := make([]iovec, len(m.Buffers))
- var sa []byte
- if c.network != "tcp" {
- sa = make([]byte, sizeofSockaddrInet6)
- }
- h.pack(vs, m.Buffers, m.OOB, sa)
- var operr error
- var n int
- fn := func(s uintptr) bool {
- n, operr = recvmsg(s, &h, flags)
- return ioComplete(flags, operr)
- }
- if err := c.c.Read(fn); err != nil {
- return err
- }
- if operr != nil {
- return os.NewSyscallError("recvmsg", operr)
- }
- if c.network != "tcp" {
- var err error
- m.Addr, err = parseInetAddr(sa[:], c.network)
- if err != nil {
- return err
- }
- }
- m.N = n
- m.NN = h.controllen()
- m.Flags = h.flags()
- return nil
-}
-
-func (c *Conn) sendMsg(m *Message, flags int) error {
- m.raceRead()
- var h msghdr
- vs := make([]iovec, len(m.Buffers))
- var sa []byte
- if m.Addr != nil {
- var a [sizeofSockaddrInet6]byte
- n := marshalInetAddr(m.Addr, a[:])
- sa = a[:n]
- }
- h.pack(vs, m.Buffers, m.OOB, sa)
- var operr error
- var n int
- fn := func(s uintptr) bool {
- n, operr = sendmsg(s, &h, flags)
- return ioComplete(flags, operr)
- }
- if err := c.c.Write(fn); err != nil {
- return err
- }
- if operr != nil {
- return os.NewSyscallError("sendmsg", operr)
- }
- m.N = n
- m.NN = len(m.OOB)
- return nil
-}
diff --git a/vendor/golang.org/x/net/internal/socket/rawconn_nommsg.go b/vendor/golang.org/x/net/internal/socket/rawconn_nommsg.go
deleted file mode 100644
index 02f3285..0000000
--- a/vendor/golang.org/x/net/internal/socket/rawconn_nommsg.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux
-// +build !linux
-
-package socket
-
-func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func (c *Conn) sendMsgs(ms []Message, flags int) (int, error) {
- return 0, errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/rawconn_nomsg.go b/vendor/golang.org/x/net/internal/socket/rawconn_nomsg.go
deleted file mode 100644
index dd78587..0000000
--- a/vendor/golang.org/x/net/internal/socket/rawconn_nomsg.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package socket
-
-func (c *Conn) recvMsg(m *Message, flags int) error {
- return errNotImplemented
-}
-
-func (c *Conn) sendMsg(m *Message, flags int) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/socket.go b/vendor/golang.org/x/net/internal/socket/socket.go
deleted file mode 100644
index dba47bf..0000000
--- a/vendor/golang.org/x/net/internal/socket/socket.go
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package socket provides a portable interface for socket system
-// calls.
-package socket // import "golang.org/x/net/internal/socket"
-
-import (
- "errors"
- "net"
- "runtime"
- "unsafe"
-)
-
-var errNotImplemented = errors.New("not implemented on " + runtime.GOOS + "/" + runtime.GOARCH)
-
-// An Option represents a sticky socket option.
-type Option struct {
- Level int // level
- Name int // name; must be equal or greater than 1
- Len int // length of value in bytes; must be equal or greater than 1
-}
-
-// Get reads a value for the option from the kernel.
-// It returns the number of bytes written into b.
-func (o *Option) Get(c *Conn, b []byte) (int, error) {
- if o.Name < 1 || o.Len < 1 {
- return 0, errors.New("invalid option")
- }
- if len(b) < o.Len {
- return 0, errors.New("short buffer")
- }
- return o.get(c, b)
-}
-
-// GetInt returns an integer value for the option.
-//
-// The Len field of Option must be either 1 or 4.
-func (o *Option) GetInt(c *Conn) (int, error) {
- if o.Len != 1 && o.Len != 4 {
- return 0, errors.New("invalid option")
- }
- var b []byte
- var bb [4]byte
- if o.Len == 1 {
- b = bb[:1]
- } else {
- b = bb[:4]
- }
- n, err := o.get(c, b)
- if err != nil {
- return 0, err
- }
- if n != o.Len {
- return 0, errors.New("invalid option length")
- }
- if o.Len == 1 {
- return int(b[0]), nil
- }
- return int(NativeEndian.Uint32(b[:4])), nil
-}
-
-// Set writes the option and value to the kernel.
-func (o *Option) Set(c *Conn, b []byte) error {
- if o.Name < 1 || o.Len < 1 {
- return errors.New("invalid option")
- }
- if len(b) < o.Len {
- return errors.New("short buffer")
- }
- return o.set(c, b)
-}
-
-// SetInt writes the option and value to the kernel.
-//
-// The Len field of Option must be either 1 or 4.
-func (o *Option) SetInt(c *Conn, v int) error {
- if o.Len != 1 && o.Len != 4 {
- return errors.New("invalid option")
- }
- var b []byte
- if o.Len == 1 {
- b = []byte{byte(v)}
- } else {
- var bb [4]byte
- NativeEndian.PutUint32(bb[:o.Len], uint32(v))
- b = bb[:4]
- }
- return o.set(c, b)
-}
-
-// ControlMessageSpace returns the whole length of control message.
-func ControlMessageSpace(dataLen int) int {
- return controlMessageSpace(dataLen)
-}
-
-// A ControlMessage represents the head message in a stream of control
-// messages.
-//
-// A control message comprises of a header, data and a few padding
-// fields to conform to the interface to the kernel.
-//
-// See RFC 3542 for further information.
-type ControlMessage []byte
-
-// Data returns the data field of the control message at the head on
-// m.
-func (m ControlMessage) Data(dataLen int) []byte {
- l := controlHeaderLen()
- if len(m) < l || len(m) < l+dataLen {
- return nil
- }
- return m[l : l+dataLen]
-}
-
-// Next returns the control message at the next on m.
-//
-// Next works only for standard control messages.
-func (m ControlMessage) Next(dataLen int) ControlMessage {
- l := ControlMessageSpace(dataLen)
- if len(m) < l {
- return nil
- }
- return m[l:]
-}
-
-// MarshalHeader marshals the header fields of the control message at
-// the head on m.
-func (m ControlMessage) MarshalHeader(lvl, typ, dataLen int) error {
- if len(m) < controlHeaderLen() {
- return errors.New("short message")
- }
- h := (*cmsghdr)(unsafe.Pointer(&m[0]))
- h.set(controlMessageLen(dataLen), lvl, typ)
- return nil
-}
-
-// ParseHeader parses and returns the header fields of the control
-// message at the head on m.
-func (m ControlMessage) ParseHeader() (lvl, typ, dataLen int, err error) {
- l := controlHeaderLen()
- if len(m) < l {
- return 0, 0, 0, errors.New("short message")
- }
- h := (*cmsghdr)(unsafe.Pointer(&m[0]))
- return h.lvl(), h.typ(), int(uint64(h.len()) - uint64(l)), nil
-}
-
-// Marshal marshals the control message at the head on m, and returns
-// the next control message.
-func (m ControlMessage) Marshal(lvl, typ int, data []byte) (ControlMessage, error) {
- l := len(data)
- if len(m) < ControlMessageSpace(l) {
- return nil, errors.New("short message")
- }
- h := (*cmsghdr)(unsafe.Pointer(&m[0]))
- h.set(controlMessageLen(l), lvl, typ)
- if l > 0 {
- copy(m.Data(l), data)
- }
- return m.Next(l), nil
-}
-
-// Parse parses m as a single or multiple control messages.
-//
-// Parse works for both standard and compatible messages.
-func (m ControlMessage) Parse() ([]ControlMessage, error) {
- var ms []ControlMessage
- for len(m) >= controlHeaderLen() {
- h := (*cmsghdr)(unsafe.Pointer(&m[0]))
- l := h.len()
- if l <= 0 {
- return nil, errors.New("invalid header length")
- }
- if uint64(l) < uint64(controlHeaderLen()) {
- return nil, errors.New("invalid message length")
- }
- if uint64(l) > uint64(len(m)) {
- return nil, errors.New("short buffer")
- }
- // On message reception:
- //
- // |<- ControlMessageSpace --------------->|
- // |<- controlMessageLen ---------->| |
- // |<- controlHeaderLen ->| | |
- // +---------------+------+---------+------+
- // | Header | PadH | Data | PadD |
- // +---------------+------+---------+------+
- //
- // On compatible message reception:
- //
- // | ... |<- controlMessageLen ----------->|
- // | ... |<- controlHeaderLen ->| |
- // +-----+---------------+------+----------+
- // | ... | Header | PadH | Data |
- // +-----+---------------+------+----------+
- ms = append(ms, ControlMessage(m[:l]))
- ll := l - controlHeaderLen()
- if len(m) >= ControlMessageSpace(ll) {
- m = m[ControlMessageSpace(ll):]
- } else {
- m = m[controlMessageLen(ll):]
- }
- }
- return ms, nil
-}
-
-// NewControlMessage returns a new stream of control messages.
-func NewControlMessage(dataLen []int) ControlMessage {
- var l int
- for i := range dataLen {
- l += ControlMessageSpace(dataLen[i])
- }
- return make([]byte, l)
-}
-
-// A Message represents an IO message.
-type Message struct {
- // When writing, the Buffers field must contain at least one
- // byte to write.
- // When reading, the Buffers field will always contain a byte
- // to read.
- Buffers [][]byte
-
- // OOB contains protocol-specific control or miscellaneous
- // ancillary data known as out-of-band data.
- OOB []byte
-
- // Addr specifies a destination address when writing.
- // It can be nil when the underlying protocol of the raw
- // connection uses connection-oriented communication.
- // After a successful read, it may contain the source address
- // on the received packet.
- Addr net.Addr
-
- N int // # of bytes read or written from/to Buffers
- NN int // # of bytes read or written from/to OOB
- Flags int // protocol-specific information on the received message
-}
-
-// RecvMsg wraps recvmsg system call.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_PEEK.
-func (c *Conn) RecvMsg(m *Message, flags int) error {
- return c.recvMsg(m, flags)
-}
-
-// SendMsg wraps sendmsg system call.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_DONTROUTE.
-func (c *Conn) SendMsg(m *Message, flags int) error {
- return c.sendMsg(m, flags)
-}
-
-// RecvMsgs wraps recvmmsg system call.
-//
-// It returns the number of processed messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_PEEK.
-//
-// Only Linux supports this.
-func (c *Conn) RecvMsgs(ms []Message, flags int) (int, error) {
- return c.recvMsgs(ms, flags)
-}
-
-// SendMsgs wraps sendmmsg system call.
-//
-// It returns the number of processed messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_DONTROUTE.
-//
-// Only Linux supports this.
-func (c *Conn) SendMsgs(ms []Message, flags int) (int, error) {
- return c.sendMsgs(ms, flags)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys.go b/vendor/golang.org/x/net/internal/socket/sys.go
deleted file mode 100644
index 4a26af1..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "encoding/binary"
- "unsafe"
-)
-
-// NativeEndian is the machine native endian implementation of ByteOrder.
-var NativeEndian binary.ByteOrder
-
-func init() {
- i := uint32(1)
- b := (*[4]byte)(unsafe.Pointer(&i))
- if b[0] == 1 {
- NativeEndian = binary.LittleEndian
- } else {
- NativeEndian = binary.BigEndian
- }
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_bsd.go b/vendor/golang.org/x/net/internal/socket/sys_bsd.go
deleted file mode 100644
index b6cd770..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_bsd.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || openbsd
-// +build aix darwin dragonfly freebsd openbsd
-
-package socket
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_const_unix.go b/vendor/golang.org/x/net/internal/socket/sys_const_unix.go
deleted file mode 100644
index 5d99f23..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_const_unix.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package socket
-
-import "golang.org/x/sys/unix"
-
-const (
- sysAF_UNSPEC = unix.AF_UNSPEC
- sysAF_INET = unix.AF_INET
- sysAF_INET6 = unix.AF_INET6
-
- sysSOCK_RAW = unix.SOCK_RAW
-
- sizeofSockaddrInet4 = unix.SizeofSockaddrInet4
- sizeofSockaddrInet6 = unix.SizeofSockaddrInet6
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linkname.go b/vendor/golang.org/x/net/internal/socket/sys_linkname.go
deleted file mode 100644
index 21734af..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linkname.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || (go1.12 && darwin)
-// +build aix go1.12,darwin
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-//go:linkname syscall_getsockopt syscall.getsockopt
-func syscall_getsockopt(s int, level int, name int, val unsafe.Pointer, vallen *uint32) error
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- err := syscall_getsockopt(int(s), level, name, unsafe.Pointer(&b[0]), &l)
- return int(l), err
-}
-
-//go:linkname syscall_setsockopt syscall.setsockopt
-func syscall_setsockopt(s int, level int, name int, val unsafe.Pointer, vallen uintptr) error
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- return syscall_setsockopt(int(s), level, name, unsafe.Pointer(&b[0]), uintptr(len(b)))
-}
-
-//go:linkname syscall_recvmsg syscall.recvmsg
-func syscall_recvmsg(s int, msg *syscall.Msghdr, flags int) (n int, err error)
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return syscall_recvmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
-}
-
-//go:linkname syscall_sendmsg syscall.sendmsg
-func syscall_sendmsg(s int, msg *syscall.Msghdr, flags int) (n int, err error)
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return syscall_sendmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux.go b/vendor/golang.org/x/net/internal/socket/sys_linux.go
deleted file mode 100644
index 76f5b8a..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && !s390x && !386
-// +build linux,!s390x,!386
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall6(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall6(sysSENDMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_386.go b/vendor/golang.org/x/net/internal/socket/sys_linux_386.go
deleted file mode 100644
index 6512153..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_386.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-const (
- sysSETSOCKOPT = 0xe
- sysGETSOCKOPT = 0xf
- sysSENDMSG = 0x10
- sysRECVMSG = 0x11
- sysRECVMMSG = 0x13
- sysSENDMMSG = 0x14
-)
-
-func socketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
-func rawsocketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- _, errno := socketcall(sysGETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
- return int(l), errnoErr(errno)
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- _, errno := socketcall(sysSETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
- return errnoErr(errno)
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, errno := socketcall(sysRECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, errno := socketcall(sysSENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, errno := socketcall(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, errno := socketcall(sysSENDMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_386.s b/vendor/golang.org/x/net/internal/socket/sys_linux_386.s
deleted file mode 100644
index 93e7d75..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_386.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT ·socketcall(SB),NOSPLIT,$0-36
- JMP syscall·socketcall(SB)
-
-TEXT ·rawsocketcall(SB),NOSPLIT,$0-36
- JMP syscall·rawsocketcall(SB)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_amd64.go b/vendor/golang.org/x/net/internal/socket/sys_linux_amd64.go
deleted file mode 100644
index 9decee2..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_amd64.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x12b
- sysSENDMMSG = 0x133
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_arm.go b/vendor/golang.org/x/net/internal/socket/sys_linux_arm.go
deleted file mode 100644
index d753b43..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_arm.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x16d
- sysSENDMMSG = 0x176
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_arm64.go b/vendor/golang.org/x/net/internal/socket/sys_linux_arm64.go
deleted file mode 100644
index b670894..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_arm64.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0xf3
- sysSENDMMSG = 0x10d
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_mips.go b/vendor/golang.org/x/net/internal/socket/sys_linux_mips.go
deleted file mode 100644
index 9c0d740..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_mips.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x10ef
- sysSENDMMSG = 0x10f7
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_mips64.go b/vendor/golang.org/x/net/internal/socket/sys_linux_mips64.go
deleted file mode 100644
index 071a4ab..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_mips64.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x14ae
- sysSENDMMSG = 0x14b6
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_mips64le.go b/vendor/golang.org/x/net/internal/socket/sys_linux_mips64le.go
deleted file mode 100644
index 071a4ab..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_mips64le.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x14ae
- sysSENDMMSG = 0x14b6
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_mipsle.go b/vendor/golang.org/x/net/internal/socket/sys_linux_mipsle.go
deleted file mode 100644
index 9c0d740..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_mipsle.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x10ef
- sysSENDMMSG = 0x10f7
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc.go b/vendor/golang.org/x/net/internal/socket/sys_linux_ppc.go
deleted file mode 100644
index 90cfaa9..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x157
- sysSENDMMSG = 0x15d
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64.go b/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64.go
deleted file mode 100644
index 21c1e3f..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x157
- sysSENDMMSG = 0x15d
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64le.go b/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64le.go
deleted file mode 100644
index 21c1e3f..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_ppc64le.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-const (
- sysRECVMMSG = 0x157
- sysSENDMMSG = 0x15d
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_riscv64.go b/vendor/golang.org/x/net/internal/socket/sys_linux_riscv64.go
deleted file mode 100644
index 5b128fb..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_riscv64.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build riscv64
-// +build riscv64
-
-package socket
-
-const (
- sysRECVMMSG = 0xf3
- sysSENDMMSG = 0x10d
-)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.go b/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.go
deleted file mode 100644
index 6512153..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-const (
- sysSETSOCKOPT = 0xe
- sysGETSOCKOPT = 0xf
- sysSENDMSG = 0x10
- sysRECVMSG = 0x11
- sysRECVMMSG = 0x13
- sysSENDMMSG = 0x14
-)
-
-func socketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
-func rawsocketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- _, errno := socketcall(sysGETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
- return int(l), errnoErr(errno)
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- _, errno := socketcall(sysSETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
- return errnoErr(errno)
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, errno := socketcall(sysRECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, errno := socketcall(sysSENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, errno := socketcall(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, errno := socketcall(sysSENDMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.s b/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.s
deleted file mode 100644
index 06d7562..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_linux_s390x.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT ·socketcall(SB),NOSPLIT,$0-72
- JMP syscall·socketcall(SB)
-
-TEXT ·rawsocketcall(SB),NOSPLIT,$0-72
- JMP syscall·rawsocketcall(SB)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_netbsd.go b/vendor/golang.org/x/net/internal/socket/sys_netbsd.go
deleted file mode 100644
index 431851c..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_netbsd.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-const (
- sysRECVMMSG = 0x1db
- sysSENDMMSG = 0x1dc
-)
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall6(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall6(sysSENDMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_posix.go b/vendor/golang.org/x/net/internal/socket/sys_posix.go
deleted file mode 100644
index 42b8f23..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_posix.go
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows zos
-
-package socket
-
-import (
- "encoding/binary"
- "errors"
- "net"
- "runtime"
- "strconv"
- "sync"
- "time"
-)
-
-// marshalInetAddr writes a in sockaddr format into the buffer b.
-// The buffer must be sufficiently large (sizeofSockaddrInet4/6).
-// Returns the number of bytes written.
-func marshalInetAddr(a net.Addr, b []byte) int {
- switch a := a.(type) {
- case *net.TCPAddr:
- return marshalSockaddr(a.IP, a.Port, a.Zone, b)
- case *net.UDPAddr:
- return marshalSockaddr(a.IP, a.Port, a.Zone, b)
- case *net.IPAddr:
- return marshalSockaddr(a.IP, 0, a.Zone, b)
- default:
- return 0
- }
-}
-
-func marshalSockaddr(ip net.IP, port int, zone string, b []byte) int {
- if ip4 := ip.To4(); ip4 != nil {
- switch runtime.GOOS {
- case "android", "illumos", "linux", "solaris", "windows":
- NativeEndian.PutUint16(b[:2], uint16(sysAF_INET))
- default:
- b[0] = sizeofSockaddrInet4
- b[1] = sysAF_INET
- }
- binary.BigEndian.PutUint16(b[2:4], uint16(port))
- copy(b[4:8], ip4)
- return sizeofSockaddrInet4
- }
- if ip6 := ip.To16(); ip6 != nil && ip.To4() == nil {
- switch runtime.GOOS {
- case "android", "illumos", "linux", "solaris", "windows":
- NativeEndian.PutUint16(b[:2], uint16(sysAF_INET6))
- default:
- b[0] = sizeofSockaddrInet6
- b[1] = sysAF_INET6
- }
- binary.BigEndian.PutUint16(b[2:4], uint16(port))
- copy(b[8:24], ip6)
- if zone != "" {
- NativeEndian.PutUint32(b[24:28], uint32(zoneCache.index(zone)))
- }
- return sizeofSockaddrInet6
- }
- return 0
-}
-
-func parseInetAddr(b []byte, network string) (net.Addr, error) {
- if len(b) < 2 {
- return nil, errors.New("invalid address")
- }
- var af int
- switch runtime.GOOS {
- case "android", "illumos", "linux", "solaris", "windows":
- af = int(NativeEndian.Uint16(b[:2]))
- default:
- af = int(b[1])
- }
- var ip net.IP
- var zone string
- if af == sysAF_INET {
- if len(b) < sizeofSockaddrInet4 {
- return nil, errors.New("short address")
- }
- ip = make(net.IP, net.IPv4len)
- copy(ip, b[4:8])
- }
- if af == sysAF_INET6 {
- if len(b) < sizeofSockaddrInet6 {
- return nil, errors.New("short address")
- }
- ip = make(net.IP, net.IPv6len)
- copy(ip, b[8:24])
- if id := int(NativeEndian.Uint32(b[24:28])); id > 0 {
- zone = zoneCache.name(id)
- }
- }
- switch network {
- case "tcp", "tcp4", "tcp6":
- return &net.TCPAddr{IP: ip, Port: int(binary.BigEndian.Uint16(b[2:4])), Zone: zone}, nil
- case "udp", "udp4", "udp6":
- return &net.UDPAddr{IP: ip, Port: int(binary.BigEndian.Uint16(b[2:4])), Zone: zone}, nil
- default:
- return &net.IPAddr{IP: ip, Zone: zone}, nil
- }
-}
-
-// An ipv6ZoneCache represents a cache holding partial network
-// interface information. It is used for reducing the cost of IPv6
-// addressing scope zone resolution.
-//
-// Multiple names sharing the index are managed by first-come
-// first-served basis for consistency.
-type ipv6ZoneCache struct {
- sync.RWMutex // guard the following
- lastFetched time.Time // last time routing information was fetched
- toIndex map[string]int // interface name to its index
- toName map[int]string // interface index to its name
-}
-
-var zoneCache = ipv6ZoneCache{
- toIndex: make(map[string]int),
- toName: make(map[int]string),
-}
-
-// update refreshes the network interface information if the cache was last
-// updated more than 1 minute ago, or if force is set. It returns whether the
-// cache was updated.
-func (zc *ipv6ZoneCache) update(ift []net.Interface, force bool) (updated bool) {
- zc.Lock()
- defer zc.Unlock()
- now := time.Now()
- if !force && zc.lastFetched.After(now.Add(-60*time.Second)) {
- return false
- }
- zc.lastFetched = now
- if len(ift) == 0 {
- var err error
- if ift, err = net.Interfaces(); err != nil {
- return false
- }
- }
- zc.toIndex = make(map[string]int, len(ift))
- zc.toName = make(map[int]string, len(ift))
- for _, ifi := range ift {
- zc.toIndex[ifi.Name] = ifi.Index
- if _, ok := zc.toName[ifi.Index]; !ok {
- zc.toName[ifi.Index] = ifi.Name
- }
- }
- return true
-}
-
-func (zc *ipv6ZoneCache) name(zone int) string {
- updated := zoneCache.update(nil, false)
- zoneCache.RLock()
- name, ok := zoneCache.toName[zone]
- zoneCache.RUnlock()
- if !ok && !updated {
- zoneCache.update(nil, true)
- zoneCache.RLock()
- name, ok = zoneCache.toName[zone]
- zoneCache.RUnlock()
- }
- if !ok { // last resort
- name = strconv.Itoa(zone)
- }
- return name
-}
-
-func (zc *ipv6ZoneCache) index(zone string) int {
- updated := zoneCache.update(nil, false)
- zoneCache.RLock()
- index, ok := zoneCache.toIndex[zone]
- zoneCache.RUnlock()
- if !ok && !updated {
- zoneCache.update(nil, true)
- zoneCache.RLock()
- index, ok = zoneCache.toIndex[zone]
- zoneCache.RUnlock()
- }
- if !ok { // last resort
- index, _ = strconv.Atoi(zone)
- }
- return index
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_solaris.go b/vendor/golang.org/x/net/internal/socket/sys_solaris.go
deleted file mode 100644
index e79ca95..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_solaris.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-//go:cgo_import_dynamic libc___xnet_getsockopt __xnet_getsockopt "libsocket.so"
-//go:cgo_import_dynamic libc_setsockopt setsockopt "libsocket.so"
-//go:cgo_import_dynamic libc___xnet_recvmsg __xnet_recvmsg "libsocket.so"
-//go:cgo_import_dynamic libc___xnet_sendmsg __xnet_sendmsg "libsocket.so"
-
-//go:linkname procGetsockopt libc___xnet_getsockopt
-//go:linkname procSetsockopt libc_setsockopt
-//go:linkname procRecvmsg libc___xnet_recvmsg
-//go:linkname procSendmsg libc___xnet_sendmsg
-
-var (
- procGetsockopt uintptr
- procSetsockopt uintptr
- procRecvmsg uintptr
- procSendmsg uintptr
-)
-
-func sysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (uintptr, uintptr, syscall.Errno)
-func rawSysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (uintptr, uintptr, syscall.Errno)
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- _, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procGetsockopt)), 5, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
- return int(l), errnoErr(errno)
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- _, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procSetsockopt)), 5, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
- return errnoErr(errno)
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procRecvmsg)), 3, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procSendmsg)), 3, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
- return int(n), errnoErr(errno)
-}
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_solaris_amd64.s b/vendor/golang.org/x/net/internal/socket/sys_solaris_amd64.s
deleted file mode 100644
index a18ac5e..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_solaris_amd64.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT ·sysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·sysvicall6(SB)
-
-TEXT ·rawSysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·rawSysvicall6(SB)
diff --git a/vendor/golang.org/x/net/internal/socket/sys_stub.go b/vendor/golang.org/x/net/internal/socket/sys_stub.go
deleted file mode 100644
index 381e45e..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_stub.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package socket
-
-import "net"
-
-const (
- sysAF_UNSPEC = 0x0
- sysAF_INET = 0x2
- sysAF_INET6 = 0xa
-
- sysSOCK_RAW = 0x3
-
- sizeofSockaddrInet4 = 0x10
- sizeofSockaddrInet6 = 0x1c
-)
-
-func marshalInetAddr(ip net.IP, port int, zone string) []byte {
- return nil
-}
-
-func parseInetAddr(b []byte, network string) (net.Addr, error) {
- return nil, errNotImplemented
-}
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- return 0, errNotImplemented
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- return errNotImplemented
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_unix.go b/vendor/golang.org/x/net/internal/socket/sys_unix.go
deleted file mode 100644
index c98ebae..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_unix.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build dragonfly || freebsd || (linux && !s390x && !386) || netbsd || openbsd
-// +build dragonfly freebsd linux,!s390x,!386 netbsd openbsd
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- _, _, errno := syscall.Syscall6(syscall.SYS_GETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
- return int(l), errnoErr(errno)
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
- return errnoErr(errno)
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall(syscall.SYS_RECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
- return int(n), errnoErr(errno)
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := syscall.Syscall(syscall.SYS_SENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_windows.go b/vendor/golang.org/x/net/internal/socket/sys_windows.go
deleted file mode 100644
index 2de0d68..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_windows.go
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-
- "golang.org/x/sys/windows"
-)
-
-func probeProtocolStack() int {
- var p uintptr
- return int(unsafe.Sizeof(p))
-}
-
-const (
- sysAF_UNSPEC = windows.AF_UNSPEC
- sysAF_INET = windows.AF_INET
- sysAF_INET6 = windows.AF_INET6
-
- sysSOCK_RAW = windows.SOCK_RAW
-
- sizeofSockaddrInet4 = 0x10
- sizeofSockaddrInet6 = 0x1c
-)
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- err := syscall.Getsockopt(syscall.Handle(s), int32(level), int32(name), (*byte)(unsafe.Pointer(&b[0])), (*int32)(unsafe.Pointer(&l)))
- return int(l), err
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- return syscall.Setsockopt(syscall.Handle(s), int32(level), int32(name), (*byte)(unsafe.Pointer(&b[0])), int32(len(b)))
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
-
-func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
- return 0, errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.go b/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.go
deleted file mode 100644
index 1e38b92..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-import (
- "syscall"
- "unsafe"
-)
-
-func syscall_syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
-func syscall_syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
-
-func probeProtocolStack() int {
- return 4 // sizeof(int) on GOOS=zos GOARCH=s390x
-}
-
-func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
- l := uint32(len(b))
- _, _, errno := syscall_syscall6(syscall.SYS_GETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
- return int(l), errnoErr(errno)
-}
-
-func setsockopt(s uintptr, level, name int, b []byte) error {
- _, _, errno := syscall_syscall6(syscall.SYS_SETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
- return errnoErr(errno)
-}
-
-func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := syscall_syscall(syscall.SYS___RECVMSG_A, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
- return int(n), errnoErr(errno)
-}
-
-func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
- n, _, errno := syscall_syscall(syscall.SYS___SENDMSG_A, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
- return int(n), errnoErr(errno)
-}
diff --git a/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.s b/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.s
deleted file mode 100644
index 60d5839..0000000
--- a/vendor/golang.org/x/net/internal/socket/sys_zos_s390x.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT ·syscall_syscall(SB),NOSPLIT,$0
- JMP syscall·_syscall(SB)
-
-TEXT ·syscall_syscall6(SB),NOSPLIT,$0
- JMP syscall·_syscall6(SB)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_aix_ppc64.go b/vendor/golang.org/x/net/internal/socket/zsys_aix_ppc64.go
deleted file mode 100644
index 00691bd..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_aix_ppc64.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_aix.go
-
-// Added for go1.11 compatibility
-//go:build aix
-// +build aix
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_darwin_386.go b/vendor/golang.org/x/net/internal/socket/zsys_darwin_386.go
deleted file mode 100644
index 5acf6db..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_darwin_386.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_darwin_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_darwin_amd64.go
deleted file mode 100644
index 98dcfe4..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_darwin_amd64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm.go b/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm.go
deleted file mode 100644
index 5acf6db..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm64.go b/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm64.go
deleted file mode 100644
index 98dcfe4..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_darwin_arm64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_dragonfly_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_dragonfly_amd64.go
deleted file mode 100644
index 636d129..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_dragonfly_amd64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_dragonfly.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_386.go b/vendor/golang.org/x/net/internal/socket/zsys_freebsd_386.go
deleted file mode 100644
index 87707fe..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_386.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_freebsd_amd64.go
deleted file mode 100644
index 7db7781..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_amd64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm.go b/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm.go
deleted file mode 100644
index 87707fe..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm64.go b/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm64.go
deleted file mode 100644
index 7db7781..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_freebsd_arm64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_386.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_386.go
deleted file mode 100644
index 4c19269..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_386.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_amd64.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_amd64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_arm.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_arm.go
deleted file mode 100644
index 4c19269..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_arm.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_arm64.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_arm64.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_arm64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_mips.go
deleted file mode 100644
index 4c19269..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64le.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64le.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_mips64le.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_mipsle.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_mipsle.go
deleted file mode 100644
index 4c19269..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_mipsle.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc.go
deleted file mode 100644
index 59b71da..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64le.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64le.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_ppc64le.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_riscv64.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_riscv64.go
deleted file mode 100644
index c066272..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_riscv64.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-//go:build riscv64
-// +build riscv64
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_0 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_linux_s390x.go b/vendor/golang.org/x/net/internal/socket/zsys_linux_s390x.go
deleted file mode 100644
index 3dcd5c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_linux_s390x.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint64
- Control *byte
- Controllen uint64
- Flags int32
- Pad_cgo_1 [4]byte
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint64
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x38
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_386.go b/vendor/golang.org/x/net/internal/socket/zsys_netbsd_386.go
deleted file mode 100644
index f95572d..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_386.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_netbsd_amd64.go
deleted file mode 100644
index a92fd60..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_amd64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm.go b/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm.go
deleted file mode 100644
index f95572d..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm.go
+++ /dev/null
@@ -1,35 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen int32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm64.go b/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm64.go
deleted file mode 100644
index a92fd60..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_netbsd_arm64.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type mmsghdr struct {
- Hdr msghdr
- Len uint32
- Pad_cgo_0 [4]byte
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_386.go b/vendor/golang.org/x/net/internal/socket/zsys_openbsd_386.go
deleted file mode 100644
index e792ec2..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_386.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_openbsd_amd64.go
deleted file mode 100644
index b68ff2d..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_amd64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm.go b/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm.go
deleted file mode 100644
index e792ec2..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint32
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x8
- sizeofMsghdr = 0x1c
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm64.go b/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm64.go
deleted file mode 100644
index b68ff2d..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_arm64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen uint32
- Pad_cgo_1 [4]byte
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_mips64.go b/vendor/golang.org/x/net/internal/socket/zsys_openbsd_mips64.go
deleted file mode 100644
index 3c9576e..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_openbsd_mips64.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Iov *iovec
- Iovlen uint32
- Control *byte
- Controllen uint32
- Flags int32
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_solaris_amd64.go b/vendor/golang.org/x/net/internal/socket/zsys_solaris_amd64.go
deleted file mode 100644
index 359cfec..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_solaris_amd64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_solaris.go
-
-package socket
-
-type iovec struct {
- Base *int8
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Namelen uint32
- Pad_cgo_0 [4]byte
- Iov *iovec
- Iovlen int32
- Pad_cgo_1 [4]byte
- Accrights *int8
- Accrightslen int32
- Pad_cgo_2 [4]byte
-}
-
-type cmsghdr struct {
- Len uint32
- Level int32
- Type int32
-}
-
-const (
- sizeofIovec = 0x10
- sizeofMsghdr = 0x30
-)
diff --git a/vendor/golang.org/x/net/internal/socket/zsys_zos_s390x.go b/vendor/golang.org/x/net/internal/socket/zsys_zos_s390x.go
deleted file mode 100644
index 49b62c8..0000000
--- a/vendor/golang.org/x/net/internal/socket/zsys_zos_s390x.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package socket
-
-type iovec struct {
- Base *byte
- Len uint64
-}
-
-type msghdr struct {
- Name *byte
- Iov *iovec
- Control *byte
- Flags int32
- Namelen uint32
- Iovlen int32
- Controllen uint32
-}
-
-type cmsghdr struct {
- Len int32
- Level int32
- Type int32
-}
-
-const sizeofCmsghdr = 12
diff --git a/vendor/golang.org/x/net/ipv4/batch.go b/vendor/golang.org/x/net/ipv4/batch.go
deleted file mode 100644
index 1a3a4fc..0000000
--- a/vendor/golang.org/x/net/ipv4/batch.go
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "runtime"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the ReadBatch and WriteBatch methods of
-// PacketConn are not implemented.
-
-// BUG(mikio): On Windows, the ReadBatch and WriteBatch methods of
-// RawConn are not implemented.
-
-// A Message represents an IO message.
-//
-// type Message struct {
-// Buffers [][]byte
-// OOB []byte
-// Addr net.Addr
-// N int
-// NN int
-// Flags int
-// }
-//
-// The Buffers fields represents a list of contiguous buffers, which
-// can be used for vectored IO, for example, putting a header and a
-// payload in each slice.
-// When writing, the Buffers field must contain at least one byte to
-// write.
-// When reading, the Buffers field will always contain a byte to read.
-//
-// The OOB field contains protocol-specific control or miscellaneous
-// ancillary data known as out-of-band data.
-// It can be nil when not required.
-//
-// The Addr field specifies a destination address when writing.
-// It can be nil when the underlying protocol of the endpoint uses
-// connection-oriented communication.
-// After a successful read, it may contain the source address on the
-// received packet.
-//
-// The N field indicates the number of bytes read or written from/to
-// Buffers.
-//
-// The NN field indicates the number of bytes read or written from/to
-// OOB.
-//
-// The Flags field contains protocol-specific information on the
-// received message.
-type Message = socket.Message
-
-// ReadBatch reads a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_PEEK.
-//
-// On a successful read it returns the number of messages received, up
-// to len(ms).
-//
-// On Linux, a batch read will be optimized.
-// On other platforms, this method will read only a single message.
-//
-// Unlike the ReadFrom method, it doesn't strip the IPv4 header
-// followed by option headers from the received IPv4 datagram when the
-// underlying transport is net.IPConn. Each Buffers field of Message
-// must be large enough to accommodate an IPv4 header and option
-// headers.
-func (c *payloadHandler) ReadBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.RecvMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.RecvMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- if compatFreeBSD32 && ms[0].NN > 0 {
- adjustFreeBSD32(&ms[0])
- }
- return n, err
- }
-}
-
-// WriteBatch writes a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_DONTROUTE.
-//
-// It returns the number of messages written on a successful write.
-//
-// On Linux, a batch write will be optimized.
-// On other platforms, this method will write only a single message.
-func (c *payloadHandler) WriteBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.SendMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.SendMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- }
-}
-
-// ReadBatch reads a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_PEEK.
-//
-// On a successful read it returns the number of messages received, up
-// to len(ms).
-//
-// On Linux, a batch read will be optimized.
-// On other platforms, this method will read only a single message.
-func (c *packetHandler) ReadBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.RecvMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.RecvMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- if compatFreeBSD32 && ms[0].NN > 0 {
- adjustFreeBSD32(&ms[0])
- }
- return n, err
- }
-}
-
-// WriteBatch writes a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_DONTROUTE.
-//
-// It returns the number of messages written on a successful write.
-//
-// On Linux, a batch write will be optimized.
-// On other platforms, this method will write only a single message.
-func (c *packetHandler) WriteBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.SendMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "write", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.SendMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "write", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- return n, err
- }
-}
diff --git a/vendor/golang.org/x/net/ipv4/control.go b/vendor/golang.org/x/net/ipv4/control.go
deleted file mode 100644
index a2b02ca..0000000
--- a/vendor/golang.org/x/net/ipv4/control.go
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "fmt"
- "net"
- "sync"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-)
-
-type rawOpt struct {
- sync.RWMutex
- cflags ControlFlags
-}
-
-func (c *rawOpt) set(f ControlFlags) { c.cflags |= f }
-func (c *rawOpt) clear(f ControlFlags) { c.cflags &^= f }
-func (c *rawOpt) isset(f ControlFlags) bool { return c.cflags&f != 0 }
-
-type ControlFlags uint
-
-const (
- FlagTTL ControlFlags = 1 << iota // pass the TTL on the received packet
- FlagSrc // pass the source address on the received packet
- FlagDst // pass the destination address on the received packet
- FlagInterface // pass the interface index on the received packet
-)
-
-// A ControlMessage represents per packet basis IP-level socket options.
-type ControlMessage struct {
- // Receiving socket options: SetControlMessage allows to
- // receive the options from the protocol stack using ReadFrom
- // method of PacketConn or RawConn.
- //
- // Specifying socket options: ControlMessage for WriteTo
- // method of PacketConn or RawConn allows to send the options
- // to the protocol stack.
- //
- TTL int // time-to-live, receiving only
- Src net.IP // source address, specifying only
- Dst net.IP // destination address, receiving only
- IfIndex int // interface index, must be 1 <= value when specifying
-}
-
-func (cm *ControlMessage) String() string {
- if cm == nil {
- return ""
- }
- return fmt.Sprintf("ttl=%d src=%v dst=%v ifindex=%d", cm.TTL, cm.Src, cm.Dst, cm.IfIndex)
-}
-
-// Marshal returns the binary encoding of cm.
-func (cm *ControlMessage) Marshal() []byte {
- if cm == nil {
- return nil
- }
- var m socket.ControlMessage
- if ctlOpts[ctlPacketInfo].name > 0 && (cm.Src.To4() != nil || cm.IfIndex > 0) {
- m = socket.NewControlMessage([]int{ctlOpts[ctlPacketInfo].length})
- }
- if len(m) > 0 {
- ctlOpts[ctlPacketInfo].marshal(m, cm)
- }
- return m
-}
-
-// Parse parses b as a control message and stores the result in cm.
-func (cm *ControlMessage) Parse(b []byte) error {
- ms, err := socket.ControlMessage(b).Parse()
- if err != nil {
- return err
- }
- for _, m := range ms {
- lvl, typ, l, err := m.ParseHeader()
- if err != nil {
- return err
- }
- if lvl != iana.ProtocolIP {
- continue
- }
- switch {
- case typ == ctlOpts[ctlTTL].name && l >= ctlOpts[ctlTTL].length:
- ctlOpts[ctlTTL].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlDst].name && l >= ctlOpts[ctlDst].length:
- ctlOpts[ctlDst].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlInterface].name && l >= ctlOpts[ctlInterface].length:
- ctlOpts[ctlInterface].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlPacketInfo].name && l >= ctlOpts[ctlPacketInfo].length:
- ctlOpts[ctlPacketInfo].parse(cm, m.Data(l))
- }
- }
- return nil
-}
-
-// NewControlMessage returns a new control message.
-//
-// The returned message is large enough for options specified by cf.
-func NewControlMessage(cf ControlFlags) []byte {
- opt := rawOpt{cflags: cf}
- var l int
- if opt.isset(FlagTTL) && ctlOpts[ctlTTL].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlTTL].length)
- }
- if ctlOpts[ctlPacketInfo].name > 0 {
- if opt.isset(FlagSrc | FlagDst | FlagInterface) {
- l += socket.ControlMessageSpace(ctlOpts[ctlPacketInfo].length)
- }
- } else {
- if opt.isset(FlagDst) && ctlOpts[ctlDst].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlDst].length)
- }
- if opt.isset(FlagInterface) && ctlOpts[ctlInterface].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlInterface].length)
- }
- }
- var b []byte
- if l > 0 {
- b = make([]byte, l)
- }
- return b
-}
-
-// Ancillary data socket options
-const (
- ctlTTL = iota // header field
- ctlSrc // header field
- ctlDst // header field
- ctlInterface // inbound or outbound interface
- ctlPacketInfo // inbound or outbound packet path
- ctlMax
-)
-
-// A ctlOpt represents a binding for ancillary data socket option.
-type ctlOpt struct {
- name int // option name, must be equal or greater than 1
- length int // option length
- marshal func([]byte, *ControlMessage) []byte
- parse func(*ControlMessage, []byte)
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_bsd.go b/vendor/golang.org/x/net/ipv4/control_bsd.go
deleted file mode 100644
index b7385df..0000000
--- a/vendor/golang.org/x/net/ipv4/control_bsd.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd || openbsd
-// +build aix darwin dragonfly freebsd netbsd openbsd
-
-package ipv4
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func marshalDst(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIP, unix.IP_RECVDSTADDR, net.IPv4len)
- return m.Next(net.IPv4len)
-}
-
-func parseDst(cm *ControlMessage, b []byte) {
- if len(cm.Dst) < net.IPv4len {
- cm.Dst = make(net.IP, net.IPv4len)
- }
- copy(cm.Dst, b[:net.IPv4len])
-}
-
-func marshalInterface(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIP, sockoptReceiveInterface, syscall.SizeofSockaddrDatalink)
- return m.Next(syscall.SizeofSockaddrDatalink)
-}
-
-func parseInterface(cm *ControlMessage, b []byte) {
- var sadl syscall.SockaddrDatalink
- copy((*[unsafe.Sizeof(sadl)]byte)(unsafe.Pointer(&sadl))[:], b)
- cm.IfIndex = int(sadl.Index)
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_pktinfo.go b/vendor/golang.org/x/net/ipv4/control_pktinfo.go
deleted file mode 100644
index 0e748db..0000000
--- a/vendor/golang.org/x/net/ipv4/control_pktinfo.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin || linux || solaris
-// +build darwin linux solaris
-
-package ipv4
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func marshalPacketInfo(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIP, unix.IP_PKTINFO, sizeofInetPktinfo)
- if cm != nil {
- pi := (*inetPktinfo)(unsafe.Pointer(&m.Data(sizeofInetPktinfo)[0]))
- if ip := cm.Src.To4(); ip != nil {
- copy(pi.Spec_dst[:], ip)
- }
- if cm.IfIndex > 0 {
- pi.setIfindex(cm.IfIndex)
- }
- }
- return m.Next(sizeofInetPktinfo)
-}
-
-func parsePacketInfo(cm *ControlMessage, b []byte) {
- pi := (*inetPktinfo)(unsafe.Pointer(&b[0]))
- cm.IfIndex = int(pi.Ifindex)
- if len(cm.Dst) < net.IPv4len {
- cm.Dst = make(net.IP, net.IPv4len)
- }
- copy(cm.Dst, pi.Addr[:])
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_stub.go b/vendor/golang.org/x/net/ipv4/control_stub.go
deleted file mode 100644
index f27322c..0000000
--- a/vendor/golang.org/x/net/ipv4/control_stub.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv4
-
-import "golang.org/x/net/internal/socket"
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_unix.go b/vendor/golang.org/x/net/ipv4/control_unix.go
deleted file mode 100644
index 2413e02..0000000
--- a/vendor/golang.org/x/net/ipv4/control_unix.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package ipv4
-
-import (
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- opt.Lock()
- defer opt.Unlock()
- if so, ok := sockOpts[ssoReceiveTTL]; ok && cf&FlagTTL != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagTTL)
- } else {
- opt.clear(FlagTTL)
- }
- }
- if so, ok := sockOpts[ssoPacketInfo]; ok {
- if cf&(FlagSrc|FlagDst|FlagInterface) != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(cf & (FlagSrc | FlagDst | FlagInterface))
- } else {
- opt.clear(cf & (FlagSrc | FlagDst | FlagInterface))
- }
- }
- } else {
- if so, ok := sockOpts[ssoReceiveDst]; ok && cf&FlagDst != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagDst)
- } else {
- opt.clear(FlagDst)
- }
- }
- if so, ok := sockOpts[ssoReceiveInterface]; ok && cf&FlagInterface != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagInterface)
- } else {
- opt.clear(FlagInterface)
- }
- }
- }
- return nil
-}
-
-func marshalTTL(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIP, unix.IP_RECVTTL, 1)
- return m.Next(1)
-}
-
-func parseTTL(cm *ControlMessage, b []byte) {
- cm.TTL = int(*(*byte)(unsafe.Pointer(&b[:1][0])))
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_windows.go b/vendor/golang.org/x/net/ipv4/control_windows.go
deleted file mode 100644
index 82c6306..0000000
--- a/vendor/golang.org/x/net/ipv4/control_windows.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import "golang.org/x/net/internal/socket"
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- // TODO(mikio): implement this
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/control_zos.go b/vendor/golang.org/x/net/ipv4/control_zos.go
deleted file mode 100644
index de11c42..0000000
--- a/vendor/golang.org/x/net/ipv4/control_zos.go
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func marshalPacketInfo(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIP, unix.IP_PKTINFO, sizeofInetPktinfo)
- if cm != nil {
- pi := (*inetPktinfo)(unsafe.Pointer(&m.Data(sizeofInetPktinfo)[0]))
- if ip := cm.Src.To4(); ip != nil {
- copy(pi.Addr[:], ip)
- }
- if cm.IfIndex > 0 {
- pi.setIfindex(cm.IfIndex)
- }
- }
- return m.Next(sizeofInetPktinfo)
-}
-
-func parsePacketInfo(cm *ControlMessage, b []byte) {
- pi := (*inetPktinfo)(unsafe.Pointer(&b[0]))
- cm.IfIndex = int(pi.Ifindex)
- if len(cm.Dst) < net.IPv4len {
- cm.Dst = make(net.IP, net.IPv4len)
- }
- copy(cm.Dst, pi.Addr[:])
-}
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- opt.Lock()
- defer opt.Unlock()
- if so, ok := sockOpts[ssoReceiveTTL]; ok && cf&FlagTTL != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagTTL)
- } else {
- opt.clear(FlagTTL)
- }
- }
- if so, ok := sockOpts[ssoPacketInfo]; ok {
- if cf&(FlagSrc|FlagDst|FlagInterface) != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(cf & (FlagSrc | FlagDst | FlagInterface))
- } else {
- opt.clear(cf & (FlagSrc | FlagDst | FlagInterface))
- }
- }
- } else {
- if so, ok := sockOpts[ssoReceiveDst]; ok && cf&FlagDst != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagDst)
- } else {
- opt.clear(FlagDst)
- }
- }
- if so, ok := sockOpts[ssoReceiveInterface]; ok && cf&FlagInterface != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagInterface)
- } else {
- opt.clear(FlagInterface)
- }
- }
- }
- return nil
-}
diff --git a/vendor/golang.org/x/net/ipv4/dgramopt.go b/vendor/golang.org/x/net/ipv4/dgramopt.go
deleted file mode 100644
index c191c22..0000000
--- a/vendor/golang.org/x/net/ipv4/dgramopt.go
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/bpf"
-)
-
-// MulticastTTL returns the time-to-live field value for outgoing
-// multicast packets.
-func (c *dgramOpt) MulticastTTL() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastTTL]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetMulticastTTL sets the time-to-live field value for future
-// outgoing multicast packets.
-func (c *dgramOpt) SetMulticastTTL(ttl int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastTTL]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, ttl)
-}
-
-// MulticastInterface returns the default interface for multicast
-// packet transmissions.
-func (c *dgramOpt) MulticastInterface() (*net.Interface, error) {
- if !c.ok() {
- return nil, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastInterface]
- if !ok {
- return nil, errNotImplemented
- }
- return so.getMulticastInterface(c.Conn)
-}
-
-// SetMulticastInterface sets the default interface for future
-// multicast packet transmissions.
-func (c *dgramOpt) SetMulticastInterface(ifi *net.Interface) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastInterface]
- if !ok {
- return errNotImplemented
- }
- return so.setMulticastInterface(c.Conn, ifi)
-}
-
-// MulticastLoopback reports whether transmitted multicast packets
-// should be copied and send back to the originator.
-func (c *dgramOpt) MulticastLoopback() (bool, error) {
- if !c.ok() {
- return false, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastLoopback]
- if !ok {
- return false, errNotImplemented
- }
- on, err := so.GetInt(c.Conn)
- if err != nil {
- return false, err
- }
- return on == 1, nil
-}
-
-// SetMulticastLoopback sets whether transmitted multicast packets
-// should be copied and send back to the originator.
-func (c *dgramOpt) SetMulticastLoopback(on bool) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastLoopback]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, boolint(on))
-}
-
-// JoinGroup joins the group address group on the interface ifi.
-// By default all sources that can cast data to group are accepted.
-// It's possible to mute and unmute data transmission from a specific
-// source by using ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup.
-// JoinGroup uses the system assigned multicast interface when ifi is
-// nil, although this is not recommended because the assignment
-// depends on platforms and sometimes it might require routing
-// configuration.
-func (c *dgramOpt) JoinGroup(ifi *net.Interface, group net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoJoinGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- return so.setGroup(c.Conn, ifi, grp)
-}
-
-// LeaveGroup leaves the group address group on the interface ifi
-// regardless of whether the group is any-source group or
-// source-specific group.
-func (c *dgramOpt) LeaveGroup(ifi *net.Interface, group net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoLeaveGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- return so.setGroup(c.Conn, ifi, grp)
-}
-
-// JoinSourceSpecificGroup joins the source-specific group comprising
-// group and source on the interface ifi.
-// JoinSourceSpecificGroup uses the system assigned multicast
-// interface when ifi is nil, although this is not recommended because
-// the assignment depends on platforms and sometimes it might require
-// routing configuration.
-func (c *dgramOpt) JoinSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoJoinSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP4(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// LeaveSourceSpecificGroup leaves the source-specific group on the
-// interface ifi.
-func (c *dgramOpt) LeaveSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoLeaveSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP4(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// ExcludeSourceSpecificGroup excludes the source-specific group from
-// the already joined any-source groups by JoinGroup on the interface
-// ifi.
-func (c *dgramOpt) ExcludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoBlockSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP4(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// IncludeSourceSpecificGroup includes the excluded source-specific
-// group by ExcludeSourceSpecificGroup again on the interface ifi.
-func (c *dgramOpt) IncludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoUnblockSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP4(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP4(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// ICMPFilter returns an ICMP filter.
-// Currently only Linux supports this.
-func (c *dgramOpt) ICMPFilter() (*ICMPFilter, error) {
- if !c.ok() {
- return nil, errInvalidConn
- }
- so, ok := sockOpts[ssoICMPFilter]
- if !ok {
- return nil, errNotImplemented
- }
- return so.getICMPFilter(c.Conn)
-}
-
-// SetICMPFilter deploys the ICMP filter.
-// Currently only Linux supports this.
-func (c *dgramOpt) SetICMPFilter(f *ICMPFilter) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoICMPFilter]
- if !ok {
- return errNotImplemented
- }
- return so.setICMPFilter(c.Conn, f)
-}
-
-// SetBPF attaches a BPF program to the connection.
-//
-// Only supported on Linux.
-func (c *dgramOpt) SetBPF(filter []bpf.RawInstruction) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoAttachFilter]
- if !ok {
- return errNotImplemented
- }
- return so.setBPF(c.Conn, filter)
-}
diff --git a/vendor/golang.org/x/net/ipv4/doc.go b/vendor/golang.org/x/net/ipv4/doc.go
deleted file mode 100644
index 2458349..0000000
--- a/vendor/golang.org/x/net/ipv4/doc.go
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package ipv4 implements IP-level socket options for the Internet
-// Protocol version 4.
-//
-// The package provides IP-level socket options that allow
-// manipulation of IPv4 facilities.
-//
-// The IPv4 protocol and basic host requirements for IPv4 are defined
-// in RFC 791 and RFC 1122.
-// Host extensions for multicasting and socket interface extensions
-// for multicast source filters are defined in RFC 1112 and RFC 3678.
-// IGMPv1, IGMPv2 and IGMPv3 are defined in RFC 1112, RFC 2236 and RFC
-// 3376.
-// Source-specific multicast is defined in RFC 4607.
-//
-//
-// Unicasting
-//
-// The options for unicasting are available for net.TCPConn,
-// net.UDPConn and net.IPConn which are created as network connections
-// that use the IPv4 transport. When a single TCP connection carrying
-// a data flow of multiple packets needs to indicate the flow is
-// important, Conn is used to set the type-of-service field on the
-// IPv4 header for each packet.
-//
-// ln, err := net.Listen("tcp4", "0.0.0.0:1024")
-// if err != nil {
-// // error handling
-// }
-// defer ln.Close()
-// for {
-// c, err := ln.Accept()
-// if err != nil {
-// // error handling
-// }
-// go func(c net.Conn) {
-// defer c.Close()
-//
-// The outgoing packets will be labeled DiffServ assured forwarding
-// class 1 low drop precedence, known as AF11 packets.
-//
-// if err := ipv4.NewConn(c).SetTOS(0x28); err != nil {
-// // error handling
-// }
-// if _, err := c.Write(data); err != nil {
-// // error handling
-// }
-// }(c)
-// }
-//
-//
-// Multicasting
-//
-// The options for multicasting are available for net.UDPConn and
-// net.IPConn which are created as network connections that use the
-// IPv4 transport. A few network facilities must be prepared before
-// you begin multicasting, at a minimum joining network interfaces and
-// multicast groups.
-//
-// en0, err := net.InterfaceByName("en0")
-// if err != nil {
-// // error handling
-// }
-// en1, err := net.InterfaceByIndex(911)
-// if err != nil {
-// // error handling
-// }
-// group := net.IPv4(224, 0, 0, 250)
-//
-// First, an application listens to an appropriate address with an
-// appropriate service port.
-//
-// c, err := net.ListenPacket("udp4", "0.0.0.0:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c.Close()
-//
-// Second, the application joins multicast groups, starts listening to
-// the groups on the specified network interfaces. Note that the
-// service port for transport layer protocol does not matter with this
-// operation as joining groups affects only network and link layer
-// protocols, such as IPv4 and Ethernet.
-//
-// p := ipv4.NewPacketConn(c)
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: group}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en1, &net.UDPAddr{IP: group}); err != nil {
-// // error handling
-// }
-//
-// The application might set per packet control message transmissions
-// between the protocol stack within the kernel. When the application
-// needs a destination address on an incoming packet,
-// SetControlMessage of PacketConn is used to enable control message
-// transmissions.
-//
-// if err := p.SetControlMessage(ipv4.FlagDst, true); err != nil {
-// // error handling
-// }
-//
-// The application could identify whether the received packets are
-// of interest by using the control message that contains the
-// destination address of the received packet.
-//
-// b := make([]byte, 1500)
-// for {
-// n, cm, src, err := p.ReadFrom(b)
-// if err != nil {
-// // error handling
-// }
-// if cm.Dst.IsMulticast() {
-// if cm.Dst.Equal(group) {
-// // joined group, do something
-// } else {
-// // unknown group, discard
-// continue
-// }
-// }
-//
-// The application can also send both unicast and multicast packets.
-//
-// p.SetTOS(0x0)
-// p.SetTTL(16)
-// if _, err := p.WriteTo(data, nil, src); err != nil {
-// // error handling
-// }
-// dst := &net.UDPAddr{IP: group, Port: 1024}
-// for _, ifi := range []*net.Interface{en0, en1} {
-// if err := p.SetMulticastInterface(ifi); err != nil {
-// // error handling
-// }
-// p.SetMulticastTTL(2)
-// if _, err := p.WriteTo(data, nil, dst); err != nil {
-// // error handling
-// }
-// }
-// }
-//
-//
-// More multicasting
-//
-// An application that uses PacketConn or RawConn may join multiple
-// multicast groups. For example, a UDP listener with port 1024 might
-// join two different groups across over two different network
-// interfaces by using:
-//
-// c, err := net.ListenPacket("udp4", "0.0.0.0:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c.Close()
-// p := ipv4.NewPacketConn(c)
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 248)}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 249)}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en1, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 249)}); err != nil {
-// // error handling
-// }
-//
-// It is possible for multiple UDP listeners that listen on the same
-// UDP port to join the same multicast group. The net package will
-// provide a socket that listens to a wildcard address with reusable
-// UDP port when an appropriate multicast address prefix is passed to
-// the net.ListenPacket or net.ListenUDP.
-//
-// c1, err := net.ListenPacket("udp4", "224.0.0.0:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c1.Close()
-// c2, err := net.ListenPacket("udp4", "224.0.0.0:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c2.Close()
-// p1 := ipv4.NewPacketConn(c1)
-// if err := p1.JoinGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 248)}); err != nil {
-// // error handling
-// }
-// p2 := ipv4.NewPacketConn(c2)
-// if err := p2.JoinGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 248)}); err != nil {
-// // error handling
-// }
-//
-// Also it is possible for the application to leave or rejoin a
-// multicast group on the network interface.
-//
-// if err := p.LeaveGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 248)}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.IPv4(224, 0, 0, 250)}); err != nil {
-// // error handling
-// }
-//
-//
-// Source-specific multicasting
-//
-// An application that uses PacketConn or RawConn on IGMPv3 supported
-// platform is able to join source-specific multicast groups.
-// The application may use JoinSourceSpecificGroup and
-// LeaveSourceSpecificGroup for the operation known as "include" mode,
-//
-// ssmgroup := net.UDPAddr{IP: net.IPv4(232, 7, 8, 9)}
-// ssmsource := net.UDPAddr{IP: net.IPv4(192, 168, 0, 1)}
-// if err := p.JoinSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
-// // error handling
-// }
-// if err := p.LeaveSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
-// // error handling
-// }
-//
-// or JoinGroup, ExcludeSourceSpecificGroup,
-// IncludeSourceSpecificGroup and LeaveGroup for the operation known
-// as "exclude" mode.
-//
-// exclsource := net.UDPAddr{IP: net.IPv4(192, 168, 0, 254)}
-// if err := p.JoinGroup(en0, &ssmgroup); err != nil {
-// // error handling
-// }
-// if err := p.ExcludeSourceSpecificGroup(en0, &ssmgroup, &exclsource); err != nil {
-// // error handling
-// }
-// if err := p.LeaveGroup(en0, &ssmgroup); err != nil {
-// // error handling
-// }
-//
-// Note that it depends on each platform implementation what happens
-// when an application which runs on IGMPv3 unsupported platform uses
-// JoinSourceSpecificGroup and LeaveSourceSpecificGroup.
-// In general the platform tries to fall back to conversations using
-// IGMPv1 or IGMPv2 and starts to listen to multicast traffic.
-// In the fallback case, ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup may return an error.
-package ipv4 // import "golang.org/x/net/ipv4"
-
-// BUG(mikio): This package is not implemented on JS, NaCl and Plan 9.
diff --git a/vendor/golang.org/x/net/ipv4/endpoint.go b/vendor/golang.org/x/net/ipv4/endpoint.go
deleted file mode 100644
index 4a6d7a8..0000000
--- a/vendor/golang.org/x/net/ipv4/endpoint.go
+++ /dev/null
@@ -1,186 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "time"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the JoinSourceSpecificGroup,
-// LeaveSourceSpecificGroup, ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup methods of PacketConn and RawConn are
-// not implemented.
-
-// A Conn represents a network endpoint that uses the IPv4 transport.
-// It is used to control basic IP-level socket options such as TOS and
-// TTL.
-type Conn struct {
- genericOpt
-}
-
-type genericOpt struct {
- *socket.Conn
-}
-
-func (c *genericOpt) ok() bool { return c != nil && c.Conn != nil }
-
-// NewConn returns a new Conn.
-func NewConn(c net.Conn) *Conn {
- cc, _ := socket.NewConn(c)
- return &Conn{
- genericOpt: genericOpt{Conn: cc},
- }
-}
-
-// A PacketConn represents a packet network endpoint that uses the
-// IPv4 transport. It is used to control several IP-level socket
-// options including multicasting. It also provides datagram based
-// network I/O methods specific to the IPv4 and higher layer protocols
-// such as UDP.
-type PacketConn struct {
- genericOpt
- dgramOpt
- payloadHandler
-}
-
-type dgramOpt struct {
- *socket.Conn
-}
-
-func (c *dgramOpt) ok() bool { return c != nil && c.Conn != nil }
-
-// SetControlMessage sets the per packet IP-level socket options.
-func (c *PacketConn) SetControlMessage(cf ControlFlags, on bool) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return setControlMessage(c.dgramOpt.Conn, &c.payloadHandler.rawOpt, cf, on)
-}
-
-// SetDeadline sets the read and write deadlines associated with the
-// endpoint.
-func (c *PacketConn) SetDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.PacketConn.SetDeadline(t)
-}
-
-// SetReadDeadline sets the read deadline associated with the
-// endpoint.
-func (c *PacketConn) SetReadDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.PacketConn.SetReadDeadline(t)
-}
-
-// SetWriteDeadline sets the write deadline associated with the
-// endpoint.
-func (c *PacketConn) SetWriteDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.PacketConn.SetWriteDeadline(t)
-}
-
-// Close closes the endpoint.
-func (c *PacketConn) Close() error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.PacketConn.Close()
-}
-
-// NewPacketConn returns a new PacketConn using c as its underlying
-// transport.
-func NewPacketConn(c net.PacketConn) *PacketConn {
- cc, _ := socket.NewConn(c.(net.Conn))
- p := &PacketConn{
- genericOpt: genericOpt{Conn: cc},
- dgramOpt: dgramOpt{Conn: cc},
- payloadHandler: payloadHandler{PacketConn: c, Conn: cc},
- }
- return p
-}
-
-// A RawConn represents a packet network endpoint that uses the IPv4
-// transport. It is used to control several IP-level socket options
-// including IPv4 header manipulation. It also provides datagram
-// based network I/O methods specific to the IPv4 and higher layer
-// protocols that handle IPv4 datagram directly such as OSPF, GRE.
-type RawConn struct {
- genericOpt
- dgramOpt
- packetHandler
-}
-
-// SetControlMessage sets the per packet IP-level socket options.
-func (c *RawConn) SetControlMessage(cf ControlFlags, on bool) error {
- if !c.packetHandler.ok() {
- return errInvalidConn
- }
- return setControlMessage(c.dgramOpt.Conn, &c.packetHandler.rawOpt, cf, on)
-}
-
-// SetDeadline sets the read and write deadlines associated with the
-// endpoint.
-func (c *RawConn) SetDeadline(t time.Time) error {
- if !c.packetHandler.ok() {
- return errInvalidConn
- }
- return c.packetHandler.IPConn.SetDeadline(t)
-}
-
-// SetReadDeadline sets the read deadline associated with the
-// endpoint.
-func (c *RawConn) SetReadDeadline(t time.Time) error {
- if !c.packetHandler.ok() {
- return errInvalidConn
- }
- return c.packetHandler.IPConn.SetReadDeadline(t)
-}
-
-// SetWriteDeadline sets the write deadline associated with the
-// endpoint.
-func (c *RawConn) SetWriteDeadline(t time.Time) error {
- if !c.packetHandler.ok() {
- return errInvalidConn
- }
- return c.packetHandler.IPConn.SetWriteDeadline(t)
-}
-
-// Close closes the endpoint.
-func (c *RawConn) Close() error {
- if !c.packetHandler.ok() {
- return errInvalidConn
- }
- return c.packetHandler.IPConn.Close()
-}
-
-// NewRawConn returns a new RawConn using c as its underlying
-// transport.
-func NewRawConn(c net.PacketConn) (*RawConn, error) {
- cc, err := socket.NewConn(c.(net.Conn))
- if err != nil {
- return nil, err
- }
- r := &RawConn{
- genericOpt: genericOpt{Conn: cc},
- dgramOpt: dgramOpt{Conn: cc},
- packetHandler: packetHandler{IPConn: c.(*net.IPConn), Conn: cc},
- }
- so, ok := sockOpts[ssoHeaderPrepend]
- if !ok {
- return nil, errNotImplemented
- }
- if err := so.SetInt(r.dgramOpt.Conn, boolint(true)); err != nil {
- return nil, err
- }
- return r, nil
-}
diff --git a/vendor/golang.org/x/net/ipv4/genericopt.go b/vendor/golang.org/x/net/ipv4/genericopt.go
deleted file mode 100644
index 51c1237..0000000
--- a/vendor/golang.org/x/net/ipv4/genericopt.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-// TOS returns the type-of-service field value for outgoing packets.
-func (c *genericOpt) TOS() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoTOS]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetTOS sets the type-of-service field value for future outgoing
-// packets.
-func (c *genericOpt) SetTOS(tos int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoTOS]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, tos)
-}
-
-// TTL returns the time-to-live field value for outgoing packets.
-func (c *genericOpt) TTL() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoTTL]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetTTL sets the time-to-live field value for future outgoing
-// packets.
-func (c *genericOpt) SetTTL(ttl int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoTTL]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, ttl)
-}
diff --git a/vendor/golang.org/x/net/ipv4/header.go b/vendor/golang.org/x/net/ipv4/header.go
deleted file mode 100644
index a00a3ea..0000000
--- a/vendor/golang.org/x/net/ipv4/header.go
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "encoding/binary"
- "fmt"
- "net"
- "runtime"
-
- "golang.org/x/net/internal/socket"
-)
-
-const (
- Version = 4 // protocol version
- HeaderLen = 20 // header length without extension headers
-)
-
-type HeaderFlags int
-
-const (
- MoreFragments HeaderFlags = 1 << iota // more fragments flag
- DontFragment // don't fragment flag
-)
-
-// A Header represents an IPv4 header.
-type Header struct {
- Version int // protocol version
- Len int // header length
- TOS int // type-of-service
- TotalLen int // packet total length
- ID int // identification
- Flags HeaderFlags // flags
- FragOff int // fragment offset
- TTL int // time-to-live
- Protocol int // next protocol
- Checksum int // checksum
- Src net.IP // source address
- Dst net.IP // destination address
- Options []byte // options, extension headers
-}
-
-func (h *Header) String() string {
- if h == nil {
- return ""
- }
- return fmt.Sprintf("ver=%d hdrlen=%d tos=%#x totallen=%d id=%#x flags=%#x fragoff=%#x ttl=%d proto=%d cksum=%#x src=%v dst=%v", h.Version, h.Len, h.TOS, h.TotalLen, h.ID, h.Flags, h.FragOff, h.TTL, h.Protocol, h.Checksum, h.Src, h.Dst)
-}
-
-// Marshal returns the binary encoding of h.
-//
-// The returned slice is in the format used by a raw IP socket on the
-// local system.
-// This may differ from the wire format, depending on the system.
-func (h *Header) Marshal() ([]byte, error) {
- if h == nil {
- return nil, errNilHeader
- }
- if h.Len < HeaderLen {
- return nil, errHeaderTooShort
- }
- hdrlen := HeaderLen + len(h.Options)
- b := make([]byte, hdrlen)
- b[0] = byte(Version<<4 | (hdrlen >> 2 & 0x0f))
- b[1] = byte(h.TOS)
- flagsAndFragOff := (h.FragOff & 0x1fff) | int(h.Flags<<13)
- switch runtime.GOOS {
- case "darwin", "ios", "dragonfly", "netbsd":
- socket.NativeEndian.PutUint16(b[2:4], uint16(h.TotalLen))
- socket.NativeEndian.PutUint16(b[6:8], uint16(flagsAndFragOff))
- case "freebsd":
- if freebsdVersion < 1100000 {
- socket.NativeEndian.PutUint16(b[2:4], uint16(h.TotalLen))
- socket.NativeEndian.PutUint16(b[6:8], uint16(flagsAndFragOff))
- } else {
- binary.BigEndian.PutUint16(b[2:4], uint16(h.TotalLen))
- binary.BigEndian.PutUint16(b[6:8], uint16(flagsAndFragOff))
- }
- default:
- binary.BigEndian.PutUint16(b[2:4], uint16(h.TotalLen))
- binary.BigEndian.PutUint16(b[6:8], uint16(flagsAndFragOff))
- }
- binary.BigEndian.PutUint16(b[4:6], uint16(h.ID))
- b[8] = byte(h.TTL)
- b[9] = byte(h.Protocol)
- binary.BigEndian.PutUint16(b[10:12], uint16(h.Checksum))
- if ip := h.Src.To4(); ip != nil {
- copy(b[12:16], ip[:net.IPv4len])
- }
- if ip := h.Dst.To4(); ip != nil {
- copy(b[16:20], ip[:net.IPv4len])
- } else {
- return nil, errMissingAddress
- }
- if len(h.Options) > 0 {
- copy(b[HeaderLen:], h.Options)
- }
- return b, nil
-}
-
-// Parse parses b as an IPv4 header and stores the result in h.
-//
-// The provided b must be in the format used by a raw IP socket on the
-// local system.
-// This may differ from the wire format, depending on the system.
-func (h *Header) Parse(b []byte) error {
- if h == nil || b == nil {
- return errNilHeader
- }
- if len(b) < HeaderLen {
- return errHeaderTooShort
- }
- hdrlen := int(b[0]&0x0f) << 2
- if len(b) < hdrlen {
- return errExtHeaderTooShort
- }
- h.Version = int(b[0] >> 4)
- h.Len = hdrlen
- h.TOS = int(b[1])
- h.ID = int(binary.BigEndian.Uint16(b[4:6]))
- h.TTL = int(b[8])
- h.Protocol = int(b[9])
- h.Checksum = int(binary.BigEndian.Uint16(b[10:12]))
- h.Src = net.IPv4(b[12], b[13], b[14], b[15])
- h.Dst = net.IPv4(b[16], b[17], b[18], b[19])
- switch runtime.GOOS {
- case "darwin", "ios", "dragonfly", "netbsd":
- h.TotalLen = int(socket.NativeEndian.Uint16(b[2:4])) + hdrlen
- h.FragOff = int(socket.NativeEndian.Uint16(b[6:8]))
- case "freebsd":
- if freebsdVersion < 1100000 {
- h.TotalLen = int(socket.NativeEndian.Uint16(b[2:4]))
- if freebsdVersion < 1000000 {
- h.TotalLen += hdrlen
- }
- h.FragOff = int(socket.NativeEndian.Uint16(b[6:8]))
- } else {
- h.TotalLen = int(binary.BigEndian.Uint16(b[2:4]))
- h.FragOff = int(binary.BigEndian.Uint16(b[6:8]))
- }
- default:
- h.TotalLen = int(binary.BigEndian.Uint16(b[2:4]))
- h.FragOff = int(binary.BigEndian.Uint16(b[6:8]))
- }
- h.Flags = HeaderFlags(h.FragOff&0xe000) >> 13
- h.FragOff = h.FragOff & 0x1fff
- optlen := hdrlen - HeaderLen
- if optlen > 0 && len(b) >= hdrlen {
- if cap(h.Options) < optlen {
- h.Options = make([]byte, optlen)
- } else {
- h.Options = h.Options[:optlen]
- }
- copy(h.Options, b[HeaderLen:hdrlen])
- }
- return nil
-}
-
-// ParseHeader parses b as an IPv4 header.
-//
-// The provided b must be in the format used by a raw IP socket on the
-// local system.
-// This may differ from the wire format, depending on the system.
-func ParseHeader(b []byte) (*Header, error) {
- h := new(Header)
- if err := h.Parse(b); err != nil {
- return nil, err
- }
- return h, nil
-}
diff --git a/vendor/golang.org/x/net/ipv4/helper.go b/vendor/golang.org/x/net/ipv4/helper.go
deleted file mode 100644
index e845a73..0000000
--- a/vendor/golang.org/x/net/ipv4/helper.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "errors"
- "net"
- "runtime"
-
- "golang.org/x/net/internal/socket"
-)
-
-var (
- errInvalidConn = errors.New("invalid connection")
- errMissingAddress = errors.New("missing address")
- errNilHeader = errors.New("nil header")
- errHeaderTooShort = errors.New("header too short")
- errExtHeaderTooShort = errors.New("extension header too short")
- errInvalidConnType = errors.New("invalid conn type")
- errNotImplemented = errors.New("not implemented on " + runtime.GOOS + "/" + runtime.GOARCH)
-
- // See https://www.freebsd.org/doc/en/books/porters-handbook/versions.html.
- freebsdVersion uint32
- compatFreeBSD32 bool // 386 emulation on amd64
-)
-
-// See golang.org/issue/30899.
-func adjustFreeBSD32(m *socket.Message) {
- // FreeBSD 12.0-RELEASE is affected by https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236737
- if 1200086 <= freebsdVersion && freebsdVersion < 1201000 {
- l := (m.NN + 4 - 1) &^ (4 - 1)
- if m.NN < l && l <= len(m.OOB) {
- m.NN = l
- }
- }
-}
-
-func boolint(b bool) int {
- if b {
- return 1
- }
- return 0
-}
-
-func netAddrToIP4(a net.Addr) net.IP {
- switch v := a.(type) {
- case *net.UDPAddr:
- if ip := v.IP.To4(); ip != nil {
- return ip
- }
- case *net.IPAddr:
- if ip := v.IP.To4(); ip != nil {
- return ip
- }
- }
- return nil
-}
-
-func opAddr(a net.Addr) net.Addr {
- switch a.(type) {
- case *net.TCPAddr:
- if a == nil {
- return nil
- }
- case *net.UDPAddr:
- if a == nil {
- return nil
- }
- case *net.IPAddr:
- if a == nil {
- return nil
- }
- }
- return a
-}
diff --git a/vendor/golang.org/x/net/ipv4/iana.go b/vendor/golang.org/x/net/ipv4/iana.go
deleted file mode 100644
index 4375b40..0000000
--- a/vendor/golang.org/x/net/ipv4/iana.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// go generate gen.go
-// Code generated by the command above; DO NOT EDIT.
-
-package ipv4
-
-// Internet Control Message Protocol (ICMP) Parameters, Updated: 2018-02-26
-const (
- ICMPTypeEchoReply ICMPType = 0 // Echo Reply
- ICMPTypeDestinationUnreachable ICMPType = 3 // Destination Unreachable
- ICMPTypeRedirect ICMPType = 5 // Redirect
- ICMPTypeEcho ICMPType = 8 // Echo
- ICMPTypeRouterAdvertisement ICMPType = 9 // Router Advertisement
- ICMPTypeRouterSolicitation ICMPType = 10 // Router Solicitation
- ICMPTypeTimeExceeded ICMPType = 11 // Time Exceeded
- ICMPTypeParameterProblem ICMPType = 12 // Parameter Problem
- ICMPTypeTimestamp ICMPType = 13 // Timestamp
- ICMPTypeTimestampReply ICMPType = 14 // Timestamp Reply
- ICMPTypePhoturis ICMPType = 40 // Photuris
- ICMPTypeExtendedEchoRequest ICMPType = 42 // Extended Echo Request
- ICMPTypeExtendedEchoReply ICMPType = 43 // Extended Echo Reply
-)
-
-// Internet Control Message Protocol (ICMP) Parameters, Updated: 2018-02-26
-var icmpTypes = map[ICMPType]string{
- 0: "echo reply",
- 3: "destination unreachable",
- 5: "redirect",
- 8: "echo",
- 9: "router advertisement",
- 10: "router solicitation",
- 11: "time exceeded",
- 12: "parameter problem",
- 13: "timestamp",
- 14: "timestamp reply",
- 40: "photuris",
- 42: "extended echo request",
- 43: "extended echo reply",
-}
diff --git a/vendor/golang.org/x/net/ipv4/icmp.go b/vendor/golang.org/x/net/ipv4/icmp.go
deleted file mode 100644
index 9902bb3..0000000
--- a/vendor/golang.org/x/net/ipv4/icmp.go
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import "golang.org/x/net/internal/iana"
-
-// An ICMPType represents a type of ICMP message.
-type ICMPType int
-
-func (typ ICMPType) String() string {
- s, ok := icmpTypes[typ]
- if !ok {
- return ""
- }
- return s
-}
-
-// Protocol returns the ICMPv4 protocol number.
-func (typ ICMPType) Protocol() int {
- return iana.ProtocolICMP
-}
-
-// An ICMPFilter represents an ICMP message filter for incoming
-// packets. The filter belongs to a packet delivery path on a host and
-// it cannot interact with forwarding packets or tunnel-outer packets.
-//
-// Note: RFC 8200 defines a reasonable role model and it works not
-// only for IPv6 but IPv4. A node means a device that implements IP.
-// A router means a node that forwards IP packets not explicitly
-// addressed to itself, and a host means a node that is not a router.
-type ICMPFilter struct {
- icmpFilter
-}
-
-// Accept accepts incoming ICMP packets including the type field value
-// typ.
-func (f *ICMPFilter) Accept(typ ICMPType) {
- f.accept(typ)
-}
-
-// Block blocks incoming ICMP packets including the type field value
-// typ.
-func (f *ICMPFilter) Block(typ ICMPType) {
- f.block(typ)
-}
-
-// SetAll sets the filter action to the filter.
-func (f *ICMPFilter) SetAll(block bool) {
- f.setAll(block)
-}
-
-// WillBlock reports whether the ICMP type will be blocked.
-func (f *ICMPFilter) WillBlock(typ ICMPType) bool {
- return f.willBlock(typ)
-}
diff --git a/vendor/golang.org/x/net/ipv4/icmp_linux.go b/vendor/golang.org/x/net/ipv4/icmp_linux.go
deleted file mode 100644
index 6e1c5c8..0000000
--- a/vendor/golang.org/x/net/ipv4/icmp_linux.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-func (f *icmpFilter) accept(typ ICMPType) {
- f.Data &^= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpFilter) block(typ ICMPType) {
- f.Data |= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpFilter) setAll(block bool) {
- if block {
- f.Data = 1<<32 - 1
- } else {
- f.Data = 0
- }
-}
-
-func (f *icmpFilter) willBlock(typ ICMPType) bool {
- return f.Data&(1<<(uint32(typ)&31)) != 0
-}
diff --git a/vendor/golang.org/x/net/ipv4/icmp_stub.go b/vendor/golang.org/x/net/ipv4/icmp_stub.go
deleted file mode 100644
index cd4ee6e..0000000
--- a/vendor/golang.org/x/net/ipv4/icmp_stub.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux
-// +build !linux
-
-package ipv4
-
-const sizeofICMPFilter = 0x0
-
-type icmpFilter struct {
-}
-
-func (f *icmpFilter) accept(typ ICMPType) {
-}
-
-func (f *icmpFilter) block(typ ICMPType) {
-}
-
-func (f *icmpFilter) setAll(block bool) {
-}
-
-func (f *icmpFilter) willBlock(typ ICMPType) bool {
- return false
-}
diff --git a/vendor/golang.org/x/net/ipv4/packet.go b/vendor/golang.org/x/net/ipv4/packet.go
deleted file mode 100644
index 7d784e0..0000000
--- a/vendor/golang.org/x/net/ipv4/packet.go
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the ReadFrom and WriteTo methods of RawConn
-// are not implemented.
-
-// A packetHandler represents the IPv4 datagram handler.
-type packetHandler struct {
- *net.IPConn
- *socket.Conn
- rawOpt
-}
-
-func (c *packetHandler) ok() bool { return c != nil && c.IPConn != nil && c.Conn != nil }
-
-// ReadFrom reads an IPv4 datagram from the endpoint c, copying the
-// datagram into b. It returns the received datagram as the IPv4
-// header h, the payload p and the control message cm.
-func (c *packetHandler) ReadFrom(b []byte) (h *Header, p []byte, cm *ControlMessage, err error) {
- if !c.ok() {
- return nil, nil, nil, errInvalidConn
- }
- c.rawOpt.RLock()
- m := socket.Message{
- Buffers: [][]byte{b},
- OOB: NewControlMessage(c.rawOpt.cflags),
- }
- c.rawOpt.RUnlock()
- if err := c.RecvMsg(&m, 0); err != nil {
- return nil, nil, nil, &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- var hs []byte
- if hs, p, err = slicePacket(b[:m.N]); err != nil {
- return nil, nil, nil, &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- if h, err = ParseHeader(hs); err != nil {
- return nil, nil, nil, &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- if m.NN > 0 {
- if compatFreeBSD32 {
- adjustFreeBSD32(&m)
- }
- cm = new(ControlMessage)
- if err := cm.Parse(m.OOB[:m.NN]); err != nil {
- return nil, nil, nil, &net.OpError{Op: "read", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Err: err}
- }
- }
- if src, ok := m.Addr.(*net.IPAddr); ok && cm != nil {
- cm.Src = src.IP
- }
- return
-}
-
-func slicePacket(b []byte) (h, p []byte, err error) {
- if len(b) < HeaderLen {
- return nil, nil, errHeaderTooShort
- }
- hdrlen := int(b[0]&0x0f) << 2
- return b[:hdrlen], b[hdrlen:], nil
-}
-
-// WriteTo writes an IPv4 datagram through the endpoint c, copying the
-// datagram from the IPv4 header h and the payload p. The control
-// message cm allows the datagram path and the outgoing interface to be
-// specified. Currently only Darwin and Linux support this. The cm
-// may be nil if control of the outgoing datagram is not required.
-//
-// The IPv4 header h must contain appropriate fields that include:
-//
-// Version =
-// Len =
-// TOS =
-// TotalLen =
-// ID = platform sets an appropriate value if ID is zero
-// FragOff =
-// TTL =
-// Protocol =
-// Checksum = platform sets an appropriate value if Checksum is zero
-// Src = platform sets an appropriate value if Src is nil
-// Dst =
-// Options = optional
-func (c *packetHandler) WriteTo(h *Header, p []byte, cm *ControlMessage) error {
- if !c.ok() {
- return errInvalidConn
- }
- m := socket.Message{
- OOB: cm.Marshal(),
- }
- wh, err := h.Marshal()
- if err != nil {
- return err
- }
- m.Buffers = [][]byte{wh, p}
- dst := new(net.IPAddr)
- if cm != nil {
- if ip := cm.Dst.To4(); ip != nil {
- dst.IP = ip
- }
- }
- if dst.IP == nil {
- dst.IP = h.Dst
- }
- m.Addr = dst
- if err := c.SendMsg(&m, 0); err != nil {
- return &net.OpError{Op: "write", Net: c.IPConn.LocalAddr().Network(), Source: c.IPConn.LocalAddr(), Addr: opAddr(dst), Err: err}
- }
- return nil
-}
diff --git a/vendor/golang.org/x/net/ipv4/payload.go b/vendor/golang.org/x/net/ipv4/payload.go
deleted file mode 100644
index f95f811..0000000
--- a/vendor/golang.org/x/net/ipv4/payload.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the ControlMessage for ReadFrom and WriteTo
-// methods of PacketConn is not implemented.
-
-// A payloadHandler represents the IPv4 datagram payload handler.
-type payloadHandler struct {
- net.PacketConn
- *socket.Conn
- rawOpt
-}
-
-func (c *payloadHandler) ok() bool { return c != nil && c.PacketConn != nil && c.Conn != nil }
diff --git a/vendor/golang.org/x/net/ipv4/payload_cmsg.go b/vendor/golang.org/x/net/ipv4/payload_cmsg.go
deleted file mode 100644
index 1bb370e..0000000
--- a/vendor/golang.org/x/net/ipv4/payload_cmsg.go
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-// ReadFrom reads a payload of the received IPv4 datagram, from the
-// endpoint c, copying the payload into b. It returns the number of
-// bytes copied into b, the control message cm and the source address
-// src of the received datagram.
-func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
- if !c.ok() {
- return 0, nil, nil, errInvalidConn
- }
- c.rawOpt.RLock()
- m := socket.Message{
- OOB: NewControlMessage(c.rawOpt.cflags),
- }
- c.rawOpt.RUnlock()
- switch c.PacketConn.(type) {
- case *net.UDPConn:
- m.Buffers = [][]byte{b}
- if err := c.RecvMsg(&m, 0); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- case *net.IPConn:
- h := make([]byte, HeaderLen)
- m.Buffers = [][]byte{h, b}
- if err := c.RecvMsg(&m, 0); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- hdrlen := int(h[0]&0x0f) << 2
- if hdrlen > len(h) {
- d := hdrlen - len(h)
- copy(b, b[d:])
- m.N -= d
- } else {
- m.N -= hdrlen
- }
- default:
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: errInvalidConnType}
- }
- if m.NN > 0 {
- if compatFreeBSD32 {
- adjustFreeBSD32(&m)
- }
- cm = new(ControlMessage)
- if err := cm.Parse(m.OOB[:m.NN]); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- cm.Src = netAddrToIP4(m.Addr)
- }
- return m.N, cm, m.Addr, nil
-}
-
-// WriteTo writes a payload of the IPv4 datagram, to the destination
-// address dst through the endpoint c, copying the payload from b. It
-// returns the number of bytes written. The control message cm allows
-// the datagram path and the outgoing interface to be specified.
-// Currently only Darwin and Linux support this. The cm may be nil if
-// control of the outgoing datagram is not required.
-func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- m := socket.Message{
- Buffers: [][]byte{b},
- OOB: cm.Marshal(),
- Addr: dst,
- }
- err = c.SendMsg(&m, 0)
- if err != nil {
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Addr: opAddr(dst), Err: err}
- }
- return m.N, err
-}
diff --git a/vendor/golang.org/x/net/ipv4/payload_nocmsg.go b/vendor/golang.org/x/net/ipv4/payload_nocmsg.go
deleted file mode 100644
index 53f0794..0000000
--- a/vendor/golang.org/x/net/ipv4/payload_nocmsg.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!zos
-
-package ipv4
-
-import "net"
-
-// ReadFrom reads a payload of the received IPv4 datagram, from the
-// endpoint c, copying the payload into b. It returns the number of
-// bytes copied into b, the control message cm and the source address
-// src of the received datagram.
-func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
- if !c.ok() {
- return 0, nil, nil, errInvalidConn
- }
- if n, src, err = c.PacketConn.ReadFrom(b); err != nil {
- return 0, nil, nil, err
- }
- return
-}
-
-// WriteTo writes a payload of the IPv4 datagram, to the destination
-// address dst through the endpoint c, copying the payload from b. It
-// returns the number of bytes written. The control message cm allows
-// the datagram path and the outgoing interface to be specified.
-// Currently only Darwin and Linux support this. The cm may be nil if
-// control of the outgoing datagram is not required.
-func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- if dst == nil {
- return 0, errMissingAddress
- }
- return c.PacketConn.WriteTo(b, dst)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sockopt.go b/vendor/golang.org/x/net/ipv4/sockopt.go
deleted file mode 100644
index 22e90c0..0000000
--- a/vendor/golang.org/x/net/ipv4/sockopt.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import "golang.org/x/net/internal/socket"
-
-// Sticky socket options
-const (
- ssoTOS = iota // header field for unicast packet
- ssoTTL // header field for unicast packet
- ssoMulticastTTL // header field for multicast packet
- ssoMulticastInterface // outbound interface for multicast packet
- ssoMulticastLoopback // loopback for multicast packet
- ssoReceiveTTL // header field on received packet
- ssoReceiveDst // header field on received packet
- ssoReceiveInterface // inbound interface on received packet
- ssoPacketInfo // incbound or outbound packet path
- ssoHeaderPrepend // ipv4 header prepend
- ssoStripHeader // strip ipv4 header
- ssoICMPFilter // icmp filter
- ssoJoinGroup // any-source multicast
- ssoLeaveGroup // any-source multicast
- ssoJoinSourceGroup // source-specific multicast
- ssoLeaveSourceGroup // source-specific multicast
- ssoBlockSourceGroup // any-source or source-specific multicast
- ssoUnblockSourceGroup // any-source or source-specific multicast
- ssoAttachFilter // attach BPF for filtering inbound traffic
-)
-
-// Sticky socket option value types
-const (
- ssoTypeIPMreq = iota + 1
- ssoTypeIPMreqn
- ssoTypeGroupReq
- ssoTypeGroupSourceReq
-)
-
-// A sockOpt represents a binding for sticky socket option.
-type sockOpt struct {
- socket.Option
- typ int // hint for option value type; optional
-}
diff --git a/vendor/golang.org/x/net/ipv4/sockopt_posix.go b/vendor/golang.org/x/net/ipv4/sockopt_posix.go
deleted file mode 100644
index eb07c1c..0000000
--- a/vendor/golang.org/x/net/ipv4/sockopt_posix.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows zos
-
-package ipv4
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
- switch so.typ {
- case ssoTypeIPMreqn:
- return so.getIPMreqn(c)
- default:
- return so.getMulticastIf(c)
- }
-}
-
-func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
- switch so.typ {
- case ssoTypeIPMreqn:
- return so.setIPMreqn(c, ifi, nil)
- default:
- return so.setMulticastIf(c, ifi)
- }
-}
-
-func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
- b := make([]byte, so.Len)
- n, err := so.Get(c, b)
- if err != nil {
- return nil, err
- }
- if n != sizeofICMPFilter {
- return nil, errNotImplemented
- }
- return (*ICMPFilter)(unsafe.Pointer(&b[0])), nil
-}
-
-func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
- b := (*[sizeofICMPFilter]byte)(unsafe.Pointer(f))[:sizeofICMPFilter]
- return so.Set(c, b)
-}
-
-func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- switch so.typ {
- case ssoTypeIPMreq:
- return so.setIPMreq(c, ifi, grp)
- case ssoTypeIPMreqn:
- return so.setIPMreqn(c, ifi, grp)
- case ssoTypeGroupReq:
- return so.setGroupReq(c, ifi, grp)
- default:
- return errNotImplemented
- }
-}
-
-func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return so.setGroupSourceReq(c, ifi, grp, src)
-}
-
-func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
- return so.setAttachFilter(c, f)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sockopt_stub.go b/vendor/golang.org/x/net/ipv4/sockopt_stub.go
deleted file mode 100644
index cf03689..0000000
--- a/vendor/golang.org/x/net/ipv4/sockopt_stub.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_aix.go b/vendor/golang.org/x/net/ipv4/sys_aix.go
deleted file mode 100644
index 02730cd..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_aix.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Added for go1.11 compatibility
-//go:build aix
-// +build aix
-
-package ipv4
-
-import (
- "net"
- "syscall"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-// IP_RECVIF is defined on AIX but doesn't work. IP_RECVINTERFACE must be used instead.
-const sockoptReceiveInterface = unix.IP_RECVINTERFACE
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 1, marshalTTL, parseTTL},
- ctlDst: {unix.IP_RECVDSTADDR, net.IPv4len, marshalDst, parseDst},
- ctlInterface: {unix.IP_RECVINTERFACE, syscall.SizeofSockaddrDatalink, marshalInterface, parseInterface},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 1}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoReceiveDst: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVDSTADDR, Len: 4}},
- ssoReceiveInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVINTERFACE, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_ADD_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_DROP_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- }
-)
diff --git a/vendor/golang.org/x/net/ipv4/sys_asmreq.go b/vendor/golang.org/x/net/ipv4/sys_asmreq.go
deleted file mode 100644
index 22322b3..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_asmreq.go
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd || openbsd || solaris || windows
-// +build aix darwin dragonfly freebsd netbsd openbsd solaris windows
-
-package ipv4
-
-import (
- "errors"
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/socket"
-)
-
-var errNoSuchInterface = errors.New("no such interface")
-
-func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- mreq := ipMreq{Multiaddr: [4]byte{grp[0], grp[1], grp[2], grp[3]}}
- if err := setIPMreqInterface(&mreq, ifi); err != nil {
- return err
- }
- b := (*[sizeofIPMreq]byte)(unsafe.Pointer(&mreq))[:sizeofIPMreq]
- return so.Set(c, b)
-}
-
-func (so *sockOpt) getMulticastIf(c *socket.Conn) (*net.Interface, error) {
- var b [4]byte
- if _, err := so.Get(c, b[:]); err != nil {
- return nil, err
- }
- ifi, err := netIP4ToInterface(net.IPv4(b[0], b[1], b[2], b[3]))
- if err != nil {
- return nil, err
- }
- return ifi, nil
-}
-
-func (so *sockOpt) setMulticastIf(c *socket.Conn, ifi *net.Interface) error {
- ip, err := netInterfaceToIP4(ifi)
- if err != nil {
- return err
- }
- var b [4]byte
- copy(b[:], ip)
- return so.Set(c, b[:])
-}
-
-func setIPMreqInterface(mreq *ipMreq, ifi *net.Interface) error {
- if ifi == nil {
- return nil
- }
- ifat, err := ifi.Addrs()
- if err != nil {
- return err
- }
- for _, ifa := range ifat {
- switch ifa := ifa.(type) {
- case *net.IPAddr:
- if ip := ifa.IP.To4(); ip != nil {
- copy(mreq.Interface[:], ip)
- return nil
- }
- case *net.IPNet:
- if ip := ifa.IP.To4(); ip != nil {
- copy(mreq.Interface[:], ip)
- return nil
- }
- }
- }
- return errNoSuchInterface
-}
-
-func netIP4ToInterface(ip net.IP) (*net.Interface, error) {
- ift, err := net.Interfaces()
- if err != nil {
- return nil, err
- }
- for _, ifi := range ift {
- ifat, err := ifi.Addrs()
- if err != nil {
- return nil, err
- }
- for _, ifa := range ifat {
- switch ifa := ifa.(type) {
- case *net.IPAddr:
- if ip.Equal(ifa.IP) {
- return &ifi, nil
- }
- case *net.IPNet:
- if ip.Equal(ifa.IP) {
- return &ifi, nil
- }
- }
- }
- }
- return nil, errNoSuchInterface
-}
-
-func netInterfaceToIP4(ifi *net.Interface) (net.IP, error) {
- if ifi == nil {
- return net.IPv4zero.To4(), nil
- }
- ifat, err := ifi.Addrs()
- if err != nil {
- return nil, err
- }
- for _, ifa := range ifat {
- switch ifa := ifa.(type) {
- case *net.IPAddr:
- if ip := ifa.IP.To4(); ip != nil {
- return ip, nil
- }
- case *net.IPNet:
- if ip := ifa.IP.To4(); ip != nil {
- return ip, nil
- }
- }
- }
- return nil, errNoSuchInterface
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_asmreq_stub.go b/vendor/golang.org/x/net/ipv4/sys_asmreq_stub.go
deleted file mode 100644
index fde6401..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_asmreq_stub.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !netbsd && !openbsd && !solaris && !windows
-// +build !aix,!darwin,!dragonfly,!freebsd,!netbsd,!openbsd,!solaris,!windows
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) getMulticastIf(c *socket.Conn) (*net.Interface, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setMulticastIf(c *socket.Conn, ifi *net.Interface) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_asmreqn.go b/vendor/golang.org/x/net/ipv4/sys_asmreqn.go
deleted file mode 100644
index 54eb990..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_asmreqn.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin || freebsd || linux
-// +build darwin freebsd linux
-
-package ipv4
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func (so *sockOpt) getIPMreqn(c *socket.Conn) (*net.Interface, error) {
- b := make([]byte, so.Len)
- if _, err := so.Get(c, b); err != nil {
- return nil, err
- }
- mreqn := (*unix.IPMreqn)(unsafe.Pointer(&b[0]))
- if mreqn.Ifindex == 0 {
- return nil, nil
- }
- ifi, err := net.InterfaceByIndex(int(mreqn.Ifindex))
- if err != nil {
- return nil, err
- }
- return ifi, nil
-}
-
-func (so *sockOpt) setIPMreqn(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- var mreqn unix.IPMreqn
- if ifi != nil {
- mreqn.Ifindex = int32(ifi.Index)
- }
- if grp != nil {
- mreqn.Multiaddr = [4]byte{grp[0], grp[1], grp[2], grp[3]}
- }
- b := (*[unix.SizeofIPMreqn]byte)(unsafe.Pointer(&mreqn))[:unix.SizeofIPMreqn]
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_asmreqn_stub.go b/vendor/golang.org/x/net/ipv4/sys_asmreqn_stub.go
deleted file mode 100644
index dcb15f2..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_asmreqn_stub.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !darwin && !freebsd && !linux
-// +build !darwin,!freebsd,!linux
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) getIPMreqn(c *socket.Conn) (*net.Interface, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setIPMreqn(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_bpf.go b/vendor/golang.org/x/net/ipv4/sys_bpf.go
deleted file mode 100644
index fb11e32..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_bpf.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux
-// +build linux
-
-package ipv4
-
-import (
- "unsafe"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
- "golang.org/x/sys/unix"
-)
-
-func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
- prog := unix.SockFprog{
- Len: uint16(len(f)),
- Filter: (*unix.SockFilter)(unsafe.Pointer(&f[0])),
- }
- b := (*[unix.SizeofSockFprog]byte)(unsafe.Pointer(&prog))[:unix.SizeofSockFprog]
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_bpf_stub.go b/vendor/golang.org/x/net/ipv4/sys_bpf_stub.go
deleted file mode 100644
index fc53a0d..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_bpf_stub.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux
-// +build !linux
-
-package ipv4
-
-import (
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_bsd.go b/vendor/golang.org/x/net/ipv4/sys_bsd.go
deleted file mode 100644
index e191b2f..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_bsd.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build netbsd || openbsd
-// +build netbsd openbsd
-
-package ipv4
-
-import (
- "net"
- "syscall"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-const sockoptReceiveInterface = unix.IP_RECVIF
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 1, marshalTTL, parseTTL},
- ctlDst: {unix.IP_RECVDSTADDR, net.IPv4len, marshalDst, parseDst},
- ctlInterface: {unix.IP_RECVIF, syscall.SizeofSockaddrDatalink, marshalInterface, parseInterface},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 1}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoReceiveDst: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVDSTADDR, Len: 4}},
- ssoReceiveInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVIF, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_ADD_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_DROP_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- }
-)
diff --git a/vendor/golang.org/x/net/ipv4/sys_darwin.go b/vendor/golang.org/x/net/ipv4/sys_darwin.go
deleted file mode 100644
index cac6f3c..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_darwin.go
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-const sockoptReceiveInterface = unix.IP_RECVIF
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 1, marshalTTL, parseTTL},
- ctlDst: {unix.IP_RECVDSTADDR, net.IPv4len, marshalDst, parseDst},
- ctlInterface: {unix.IP_RECVIF, syscall.SizeofSockaddrDatalink, marshalInterface, parseInterface},
- ctlPacketInfo: {unix.IP_PKTINFO, sizeofInetPktinfo, marshalPacketInfo, parsePacketInfo},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: unix.SizeofIPMreqn}, typ: ssoTypeIPMreqn},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoReceiveDst: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVDSTADDR, Len: 4}},
- ssoReceiveInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVIF, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoStripHeader: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_STRIPHDR, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoPacketInfo: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVPKTINFO, Len: 4}},
- }
-)
-
-func (pi *inetPktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 132))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_dragonfly.go b/vendor/golang.org/x/net/ipv4/sys_dragonfly.go
deleted file mode 100644
index 0620d0e..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_dragonfly.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "syscall"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-const sockoptReceiveInterface = unix.IP_RECVIF
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 1, marshalTTL, parseTTL},
- ctlDst: {unix.IP_RECVDSTADDR, net.IPv4len, marshalDst, parseDst},
- ctlInterface: {unix.IP_RECVIF, syscall.SizeofSockaddrDatalink, marshalInterface, parseInterface},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoReceiveDst: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVDSTADDR, Len: 4}},
- ssoReceiveInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVIF, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_ADD_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_DROP_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- }
-)
diff --git a/vendor/golang.org/x/net/ipv4/sys_freebsd.go b/vendor/golang.org/x/net/ipv4/sys_freebsd.go
deleted file mode 100644
index 8961228..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_freebsd.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "runtime"
- "strings"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-const sockoptReceiveInterface = unix.IP_RECVIF
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 1, marshalTTL, parseTTL},
- ctlDst: {unix.IP_RECVDSTADDR, net.IPv4len, marshalDst, parseDst},
- ctlInterface: {unix.IP_RECVIF, syscall.SizeofSockaddrDatalink, marshalInterface, parseInterface},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoReceiveDst: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVDSTADDR, Len: 4}},
- ssoReceiveInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVIF, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func init() {
- freebsdVersion, _ = syscall.SysctlUint32("kern.osreldate")
- if freebsdVersion >= 1000000 {
- sockOpts[ssoMulticastInterface] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: unix.SizeofIPMreqn}, typ: ssoTypeIPMreqn}
- }
- if runtime.GOOS == "freebsd" && runtime.GOARCH == "386" {
- archs, _ := syscall.Sysctl("kern.supported_archs")
- for _, s := range strings.Fields(archs) {
- if s == "amd64" {
- compatFreeBSD32 = true
- break
- }
- }
- }
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(&gr.Group))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(&gsr.Group))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet)(unsafe.Pointer(&gsr.Source))
- sa.Len = sizeofSockaddrInet
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_linux.go b/vendor/golang.org/x/net/ipv4/sys_linux.go
deleted file mode 100644
index 4588a5f..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_linux.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_TTL, 1, marshalTTL, parseTTL},
- ctlPacketInfo: {unix.IP_PKTINFO, sizeofInetPktinfo, marshalPacketInfo, parsePacketInfo},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: unix.SizeofIPMreqn}, typ: ssoTypeIPMreqn},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoPacketInfo: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_PKTINFO, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolReserved, Name: unix.ICMP_FILTER, Len: sizeofICMPFilter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoAttachFilter: {Option: socket.Option{Level: unix.SOL_SOCKET, Name: unix.SO_ATTACH_FILTER, Len: unix.SizeofSockFprog}},
- }
-)
-
-func (pi *inetPktinfo) setIfindex(i int) {
- pi.Ifindex = int32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(&gr.Group))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(&gsr.Group))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet)(unsafe.Pointer(&gsr.Source))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_solaris.go b/vendor/golang.org/x/net/ipv4/sys_solaris.go
deleted file mode 100644
index 0bb9f3e..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_solaris.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-const sockoptReceiveInterface = unix.IP_RECVIF
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTTL: {unix.IP_RECVTTL, 4, marshalTTL, parseTTL},
- ctlPacketInfo: {unix.IP_PKTINFO, sizeofInetPktinfo, marshalPacketInfo, parsePacketInfo},
- }
-
- sockOpts = map[int]sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 1}},
- ssoReceiveTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVTTL, Len: 4}},
- ssoPacketInfo: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVPKTINFO, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func (pi *inetPktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 260))
- sa.Family = syscall.AF_INET
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_ssmreq.go b/vendor/golang.org/x/net/ipv4/sys_ssmreq.go
deleted file mode 100644
index 6a4e7ab..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_ssmreq.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin || freebsd || linux || solaris
-// +build darwin freebsd linux solaris
-
-package ipv4
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- var gr groupReq
- if ifi != nil {
- gr.Interface = uint32(ifi.Index)
- }
- gr.setGroup(grp)
- var b []byte
- if compatFreeBSD32 {
- var d [sizeofGroupReq + 4]byte
- s := (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))
- copy(d[:4], s[:4])
- copy(d[8:], s[4:])
- b = d[:]
- } else {
- b = (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))[:sizeofGroupReq]
- }
- return so.Set(c, b)
-}
-
-func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- var gsr groupSourceReq
- if ifi != nil {
- gsr.Interface = uint32(ifi.Index)
- }
- gsr.setSourceGroup(grp, src)
- var b []byte
- if compatFreeBSD32 {
- var d [sizeofGroupSourceReq + 4]byte
- s := (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))
- copy(d[:4], s[:4])
- copy(d[8:], s[4:])
- b = d[:]
- } else {
- b = (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))[:sizeofGroupSourceReq]
- }
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_ssmreq_stub.go b/vendor/golang.org/x/net/ipv4/sys_ssmreq_stub.go
deleted file mode 100644
index 157159f..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_ssmreq_stub.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !darwin && !freebsd && !linux && !solaris
-// +build !darwin,!freebsd,!linux,!solaris
-
-package ipv4
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv4/sys_stub.go b/vendor/golang.org/x/net/ipv4/sys_stub.go
deleted file mode 100644
index d550851..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_stub.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv4
-
-var (
- ctlOpts = [ctlMax]ctlOpt{}
-
- sockOpts = map[int]*sockOpt{}
-)
diff --git a/vendor/golang.org/x/net/ipv4/sys_windows.go b/vendor/golang.org/x/net/ipv4/sys_windows.go
deleted file mode 100644
index c5e9506..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_windows.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/windows"
-)
-
-const (
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
-)
-
-type ipMreq struct {
- Multiaddr [4]byte
- Interface [4]byte
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte
- Sourceaddr [4]byte
- Interface [4]byte
-}
-
-// See http://msdn.microsoft.com/en-us/library/windows/desktop/ms738586(v=vs.85).aspx
-var (
- ctlOpts = [ctlMax]ctlOpt{}
-
- sockOpts = map[int]*sockOpt{
- ssoTOS: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_TOS, Len: 4}},
- ssoTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_TTL, Len: 4}},
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_MULTICAST_TTL, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_MULTICAST_LOOP, Len: 4}},
- ssoHeaderPrepend: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_HDRINCL, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_ADD_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: windows.IP_DROP_MEMBERSHIP, Len: sizeofIPMreq}, typ: ssoTypeIPMreq},
- }
-)
diff --git a/vendor/golang.org/x/net/ipv4/sys_zos.go b/vendor/golang.org/x/net/ipv4/sys_zos.go
deleted file mode 100644
index be20640..0000000
--- a/vendor/golang.org/x/net/ipv4/sys_zos.go
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv4
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlPacketInfo: {unix.IP_PKTINFO, sizeofInetPktinfo, marshalPacketInfo, parsePacketInfo},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoMulticastTTL: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_TTL, Len: 1}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_IF, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_MULTICAST_LOOP, Len: 1}},
- ssoPacketInfo: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.IP_RECVPKTINFO, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIP, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func (pi *inetPktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet4)(unsafe.Pointer(&gr.Group))
- sa.Family = syscall.AF_INET
- sa.Len = sizeofSockaddrInet4
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet4)(unsafe.Pointer(&gsr.Group))
- sa.Family = syscall.AF_INET
- sa.Len = sizeofSockaddrInet4
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet4)(unsafe.Pointer(&gsr.Source))
- sa.Family = syscall.AF_INET
- sa.Len = sizeofSockaddrInet4
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_aix_ppc64.go b/vendor/golang.org/x/net/ipv4/zsys_aix_ppc64.go
deleted file mode 100644
index b7f2d6e..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_aix_ppc64.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_aix.go
-
-// Added for go1.11 compatibility
-//go:build aix
-// +build aix
-
-package ipv4
-
-const (
- sizeofIPMreq = 0x8
-)
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_darwin.go b/vendor/golang.org/x/net/ipv4/zsys_darwin.go
deleted file mode 100644
index 6c1b705..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_darwin.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]int8
-}
-
-type inetPktinfo struct {
- Ifindex uint32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [128]byte
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [128]byte
- Pad_cgo_1 [128]byte
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_dragonfly.go b/vendor/golang.org/x/net/ipv4/zsys_dragonfly.go
deleted file mode 100644
index 2155df1..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_dragonfly.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_dragonfly.go
-
-package ipv4
-
-const (
- sizeofIPMreq = 0x8
-)
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_freebsd_386.go b/vendor/golang.org/x/net/ipv4/zsys_freebsd_386.go
deleted file mode 100644
index ae40482..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_freebsd_386.go
+++ /dev/null
@@ -1,52 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]int8
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_freebsd_amd64.go b/vendor/golang.org/x/net/ipv4/zsys_freebsd_amd64.go
deleted file mode 100644
index 9018186..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_freebsd_amd64.go
+++ /dev/null
@@ -1,54 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]int8
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
- Source sockaddrStorage
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm.go b/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm.go
deleted file mode 100644
index 9018186..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm.go
+++ /dev/null
@@ -1,54 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]int8
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
- Source sockaddrStorage
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm64.go b/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm64.go
deleted file mode 100644
index 0feb9a7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_freebsd_arm64.go
+++ /dev/null
@@ -1,52 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]uint8
- X__ss_align int64
- X__ss_pad2 [112]uint8
-}
-
-type sockaddrInet struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]uint8
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_386.go b/vendor/golang.org/x/net/ipv4/zsys_linux_386.go
deleted file mode 100644
index d510357..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_386.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_amd64.go b/vendor/golang.org/x/net/ipv4/zsys_linux_amd64.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_amd64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_arm.go b/vendor/golang.org/x/net/ipv4/zsys_linux_arm.go
deleted file mode 100644
index d510357..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_arm.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_arm64.go b/vendor/golang.org/x/net/ipv4/zsys_linux_arm64.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_arm64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_mips.go b/vendor/golang.org/x/net/ipv4/zsys_linux_mips.go
deleted file mode 100644
index d510357..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_mips.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_mips64.go b/vendor/golang.org/x/net/ipv4/zsys_linux_mips64.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_mips64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_mips64le.go b/vendor/golang.org/x/net/ipv4/zsys_linux_mips64le.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_mips64le.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_mipsle.go b/vendor/golang.org/x/net/ipv4/zsys_linux_mipsle.go
deleted file mode 100644
index d510357..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_mipsle.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc.go b/vendor/golang.org/x/net/ipv4/zsys_linux_ppc.go
deleted file mode 100644
index 29202e4..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]uint8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64.go b/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64le.go b/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64le.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_ppc64le.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_riscv64.go b/vendor/golang.org/x/net/ipv4/zsys_linux_riscv64.go
deleted file mode 100644
index e2edebd..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_riscv64.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-//go:build riscv64
-// +build riscv64
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_linux_s390x.go b/vendor/golang.org/x/net/ipv4/zsys_linux_s390x.go
deleted file mode 100644
index eb10cc7..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_linux_s390x.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv4
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
- sizeofSockExtendedErr = 0x10
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPFilter = 0x4
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- X__pad [8]uint8
-}
-
-type inetPktinfo struct {
- Ifindex int32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type sockExtendedErr struct {
- Errno uint32
- Origin uint8
- Type uint8
- Code uint8
- Pad uint8
- Info uint32
- Data uint32
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr uint32
- Interface uint32
- Sourceaddr uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpFilter struct {
- Data uint32
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_netbsd.go b/vendor/golang.org/x/net/ipv4/zsys_netbsd.go
deleted file mode 100644
index a2ef2f6..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_netbsd.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package ipv4
-
-const (
- sizeofIPMreq = 0x8
-)
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_openbsd.go b/vendor/golang.org/x/net/ipv4/zsys_openbsd.go
deleted file mode 100644
index b293a33..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_openbsd.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package ipv4
-
-const (
- sizeofIPMreq = 0x8
-)
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_solaris.go b/vendor/golang.org/x/net/ipv4/zsys_solaris.go
deleted file mode 100644
index e1a961b..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_solaris.go
+++ /dev/null
@@ -1,57 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_solaris.go
-
-package ipv4
-
-const (
- sizeofSockaddrStorage = 0x100
- sizeofSockaddrInet = 0x10
- sizeofInetPktinfo = 0xc
-
- sizeofIPMreq = 0x8
- sizeofIPMreqSource = 0xc
- sizeofGroupReq = 0x104
- sizeofGroupSourceReq = 0x204
-)
-
-type sockaddrStorage struct {
- Family uint16
- X_ss_pad1 [6]int8
- X_ss_align float64
- X_ss_pad2 [240]int8
-}
-
-type sockaddrInet struct {
- Family uint16
- Port uint16
- Addr [4]byte /* in_addr */
- Zero [8]int8
-}
-
-type inetPktinfo struct {
- Ifindex uint32
- Spec_dst [4]byte /* in_addr */
- Addr [4]byte /* in_addr */
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type ipMreqSource struct {
- Multiaddr [4]byte /* in_addr */
- Sourceaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [256]byte
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [256]byte
- Pad_cgo_1 [256]byte
-}
diff --git a/vendor/golang.org/x/net/ipv4/zsys_zos_s390x.go b/vendor/golang.org/x/net/ipv4/zsys_zos_s390x.go
deleted file mode 100644
index 692abf6..0000000
--- a/vendor/golang.org/x/net/ipv4/zsys_zos_s390x.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Hand edited based on zerrors_zos_s390x.go
-// TODO(Bill O'Farrell): auto-generate.
-
-package ipv4
-
-const (
- sizeofIPMreq = 8
- sizeofSockaddrInet4 = 16
- sizeofSockaddrStorage = 128
- sizeofGroupReq = 136
- sizeofGroupSourceReq = 264
- sizeofInetPktinfo = 8
-)
-
-type sockaddrInet4 struct {
- Len uint8
- Family uint8
- Port uint16
- Addr [4]byte
- Zero [8]uint8
-}
-
-type inetPktinfo struct {
- Addr [4]byte
- Ifindex uint32
-}
-
-type sockaddrStorage struct {
- Len uint8
- Family byte
- ss_pad1 [6]byte
- ss_align int64
- ss_pad2 [112]byte
-}
-
-type groupReq struct {
- Interface uint32
- reserved uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- reserved uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type ipMreq struct {
- Multiaddr [4]byte /* in_addr */
- Interface [4]byte /* in_addr */
-}
diff --git a/vendor/golang.org/x/net/ipv6/batch.go b/vendor/golang.org/x/net/ipv6/batch.go
deleted file mode 100644
index 2ccb984..0000000
--- a/vendor/golang.org/x/net/ipv6/batch.go
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "runtime"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the ReadBatch and WriteBatch methods of
-// PacketConn are not implemented.
-
-// A Message represents an IO message.
-//
-// type Message struct {
-// Buffers [][]byte
-// OOB []byte
-// Addr net.Addr
-// N int
-// NN int
-// Flags int
-// }
-//
-// The Buffers fields represents a list of contiguous buffers, which
-// can be used for vectored IO, for example, putting a header and a
-// payload in each slice.
-// When writing, the Buffers field must contain at least one byte to
-// write.
-// When reading, the Buffers field will always contain a byte to read.
-//
-// The OOB field contains protocol-specific control or miscellaneous
-// ancillary data known as out-of-band data.
-// It can be nil when not required.
-//
-// The Addr field specifies a destination address when writing.
-// It can be nil when the underlying protocol of the endpoint uses
-// connection-oriented communication.
-// After a successful read, it may contain the source address on the
-// received packet.
-//
-// The N field indicates the number of bytes read or written from/to
-// Buffers.
-//
-// The NN field indicates the number of bytes read or written from/to
-// OOB.
-//
-// The Flags field contains protocol-specific information on the
-// received message.
-type Message = socket.Message
-
-// ReadBatch reads a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_PEEK.
-//
-// On a successful read it returns the number of messages received, up
-// to len(ms).
-//
-// On Linux, a batch read will be optimized.
-// On other platforms, this method will read only a single message.
-func (c *payloadHandler) ReadBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.RecvMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.RecvMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- }
-}
-
-// WriteBatch writes a batch of messages.
-//
-// The provided flags is a set of platform-dependent flags, such as
-// syscall.MSG_DONTROUTE.
-//
-// It returns the number of messages written on a successful write.
-//
-// On Linux, a batch write will be optimized.
-// On other platforms, this method will write only a single message.
-func (c *payloadHandler) WriteBatch(ms []Message, flags int) (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- switch runtime.GOOS {
- case "linux":
- n, err := c.SendMsgs([]socket.Message(ms), flags)
- if err != nil {
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- default:
- n := 1
- err := c.SendMsg(&ms[0], flags)
- if err != nil {
- n = 0
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- return n, err
- }
-}
diff --git a/vendor/golang.org/x/net/ipv6/control.go b/vendor/golang.org/x/net/ipv6/control.go
deleted file mode 100644
index 2da6444..0000000
--- a/vendor/golang.org/x/net/ipv6/control.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "fmt"
- "net"
- "sync"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-)
-
-// Note that RFC 3542 obsoletes RFC 2292 but OS X Snow Leopard and the
-// former still support RFC 2292 only. Please be aware that almost
-// all protocol implementations prohibit using a combination of RFC
-// 2292 and RFC 3542 for some practical reasons.
-
-type rawOpt struct {
- sync.RWMutex
- cflags ControlFlags
-}
-
-func (c *rawOpt) set(f ControlFlags) { c.cflags |= f }
-func (c *rawOpt) clear(f ControlFlags) { c.cflags &^= f }
-func (c *rawOpt) isset(f ControlFlags) bool { return c.cflags&f != 0 }
-
-// A ControlFlags represents per packet basis IP-level socket option
-// control flags.
-type ControlFlags uint
-
-const (
- FlagTrafficClass ControlFlags = 1 << iota // pass the traffic class on the received packet
- FlagHopLimit // pass the hop limit on the received packet
- FlagSrc // pass the source address on the received packet
- FlagDst // pass the destination address on the received packet
- FlagInterface // pass the interface index on the received packet
- FlagPathMTU // pass the path MTU on the received packet path
-)
-
-const flagPacketInfo = FlagDst | FlagInterface
-
-// A ControlMessage represents per packet basis IP-level socket
-// options.
-type ControlMessage struct {
- // Receiving socket options: SetControlMessage allows to
- // receive the options from the protocol stack using ReadFrom
- // method of PacketConn.
- //
- // Specifying socket options: ControlMessage for WriteTo
- // method of PacketConn allows to send the options to the
- // protocol stack.
- //
- TrafficClass int // traffic class, must be 1 <= value <= 255 when specifying
- HopLimit int // hop limit, must be 1 <= value <= 255 when specifying
- Src net.IP // source address, specifying only
- Dst net.IP // destination address, receiving only
- IfIndex int // interface index, must be 1 <= value when specifying
- NextHop net.IP // next hop address, specifying only
- MTU int // path MTU, receiving only
-}
-
-func (cm *ControlMessage) String() string {
- if cm == nil {
- return ""
- }
- return fmt.Sprintf("tclass=%#x hoplim=%d src=%v dst=%v ifindex=%d nexthop=%v mtu=%d", cm.TrafficClass, cm.HopLimit, cm.Src, cm.Dst, cm.IfIndex, cm.NextHop, cm.MTU)
-}
-
-// Marshal returns the binary encoding of cm.
-func (cm *ControlMessage) Marshal() []byte {
- if cm == nil {
- return nil
- }
- var l int
- tclass := false
- if ctlOpts[ctlTrafficClass].name > 0 && cm.TrafficClass > 0 {
- tclass = true
- l += socket.ControlMessageSpace(ctlOpts[ctlTrafficClass].length)
- }
- hoplimit := false
- if ctlOpts[ctlHopLimit].name > 0 && cm.HopLimit > 0 {
- hoplimit = true
- l += socket.ControlMessageSpace(ctlOpts[ctlHopLimit].length)
- }
- pktinfo := false
- if ctlOpts[ctlPacketInfo].name > 0 && (cm.Src.To16() != nil && cm.Src.To4() == nil || cm.IfIndex > 0) {
- pktinfo = true
- l += socket.ControlMessageSpace(ctlOpts[ctlPacketInfo].length)
- }
- nexthop := false
- if ctlOpts[ctlNextHop].name > 0 && cm.NextHop.To16() != nil && cm.NextHop.To4() == nil {
- nexthop = true
- l += socket.ControlMessageSpace(ctlOpts[ctlNextHop].length)
- }
- var b []byte
- if l > 0 {
- b = make([]byte, l)
- bb := b
- if tclass {
- bb = ctlOpts[ctlTrafficClass].marshal(bb, cm)
- }
- if hoplimit {
- bb = ctlOpts[ctlHopLimit].marshal(bb, cm)
- }
- if pktinfo {
- bb = ctlOpts[ctlPacketInfo].marshal(bb, cm)
- }
- if nexthop {
- bb = ctlOpts[ctlNextHop].marshal(bb, cm)
- }
- }
- return b
-}
-
-// Parse parses b as a control message and stores the result in cm.
-func (cm *ControlMessage) Parse(b []byte) error {
- ms, err := socket.ControlMessage(b).Parse()
- if err != nil {
- return err
- }
- for _, m := range ms {
- lvl, typ, l, err := m.ParseHeader()
- if err != nil {
- return err
- }
- if lvl != iana.ProtocolIPv6 {
- continue
- }
- switch {
- case typ == ctlOpts[ctlTrafficClass].name && l >= ctlOpts[ctlTrafficClass].length:
- ctlOpts[ctlTrafficClass].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlHopLimit].name && l >= ctlOpts[ctlHopLimit].length:
- ctlOpts[ctlHopLimit].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlPacketInfo].name && l >= ctlOpts[ctlPacketInfo].length:
- ctlOpts[ctlPacketInfo].parse(cm, m.Data(l))
- case typ == ctlOpts[ctlPathMTU].name && l >= ctlOpts[ctlPathMTU].length:
- ctlOpts[ctlPathMTU].parse(cm, m.Data(l))
- }
- }
- return nil
-}
-
-// NewControlMessage returns a new control message.
-//
-// The returned message is large enough for options specified by cf.
-func NewControlMessage(cf ControlFlags) []byte {
- opt := rawOpt{cflags: cf}
- var l int
- if opt.isset(FlagTrafficClass) && ctlOpts[ctlTrafficClass].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlTrafficClass].length)
- }
- if opt.isset(FlagHopLimit) && ctlOpts[ctlHopLimit].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlHopLimit].length)
- }
- if opt.isset(flagPacketInfo) && ctlOpts[ctlPacketInfo].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlPacketInfo].length)
- }
- if opt.isset(FlagPathMTU) && ctlOpts[ctlPathMTU].name > 0 {
- l += socket.ControlMessageSpace(ctlOpts[ctlPathMTU].length)
- }
- var b []byte
- if l > 0 {
- b = make([]byte, l)
- }
- return b
-}
-
-// Ancillary data socket options
-const (
- ctlTrafficClass = iota // header field
- ctlHopLimit // header field
- ctlPacketInfo // inbound or outbound packet path
- ctlNextHop // nexthop
- ctlPathMTU // path mtu
- ctlMax
-)
-
-// A ctlOpt represents a binding for ancillary data socket option.
-type ctlOpt struct {
- name int // option name, must be equal or greater than 1
- length int // option length
- marshal func([]byte, *ControlMessage) []byte
- parse func(*ControlMessage, []byte)
-}
diff --git a/vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go b/vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go
deleted file mode 100644
index 2733ddb..0000000
--- a/vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build darwin
-// +build darwin
-
-package ipv6
-
-import (
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func marshal2292HopLimit(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_2292HOPLIMIT, 4)
- if cm != nil {
- socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.HopLimit))
- }
- return m.Next(4)
-}
-
-func marshal2292PacketInfo(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_2292PKTINFO, sizeofInet6Pktinfo)
- if cm != nil {
- pi := (*inet6Pktinfo)(unsafe.Pointer(&m.Data(sizeofInet6Pktinfo)[0]))
- if ip := cm.Src.To16(); ip != nil && ip.To4() == nil {
- copy(pi.Addr[:], ip)
- }
- if cm.IfIndex > 0 {
- pi.setIfindex(cm.IfIndex)
- }
- }
- return m.Next(sizeofInet6Pktinfo)
-}
-
-func marshal2292NextHop(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_2292NEXTHOP, sizeofSockaddrInet6)
- if cm != nil {
- sa := (*sockaddrInet6)(unsafe.Pointer(&m.Data(sizeofSockaddrInet6)[0]))
- sa.setSockaddr(cm.NextHop, cm.IfIndex)
- }
- return m.Next(sizeofSockaddrInet6)
-}
diff --git a/vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go b/vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go
deleted file mode 100644
index 9c90844..0000000
--- a/vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package ipv6
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-func marshalTrafficClass(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_TCLASS, 4)
- if cm != nil {
- socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.TrafficClass))
- }
- return m.Next(4)
-}
-
-func parseTrafficClass(cm *ControlMessage, b []byte) {
- cm.TrafficClass = int(socket.NativeEndian.Uint32(b[:4]))
-}
-
-func marshalHopLimit(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_HOPLIMIT, 4)
- if cm != nil {
- socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.HopLimit))
- }
- return m.Next(4)
-}
-
-func parseHopLimit(cm *ControlMessage, b []byte) {
- cm.HopLimit = int(socket.NativeEndian.Uint32(b[:4]))
-}
-
-func marshalPacketInfo(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_PKTINFO, sizeofInet6Pktinfo)
- if cm != nil {
- pi := (*inet6Pktinfo)(unsafe.Pointer(&m.Data(sizeofInet6Pktinfo)[0]))
- if ip := cm.Src.To16(); ip != nil && ip.To4() == nil {
- copy(pi.Addr[:], ip)
- }
- if cm.IfIndex > 0 {
- pi.setIfindex(cm.IfIndex)
- }
- }
- return m.Next(sizeofInet6Pktinfo)
-}
-
-func parsePacketInfo(cm *ControlMessage, b []byte) {
- pi := (*inet6Pktinfo)(unsafe.Pointer(&b[0]))
- if len(cm.Dst) < net.IPv6len {
- cm.Dst = make(net.IP, net.IPv6len)
- }
- copy(cm.Dst, pi.Addr[:])
- cm.IfIndex = int(pi.Ifindex)
-}
-
-func marshalNextHop(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_NEXTHOP, sizeofSockaddrInet6)
- if cm != nil {
- sa := (*sockaddrInet6)(unsafe.Pointer(&m.Data(sizeofSockaddrInet6)[0]))
- sa.setSockaddr(cm.NextHop, cm.IfIndex)
- }
- return m.Next(sizeofSockaddrInet6)
-}
-
-func parseNextHop(cm *ControlMessage, b []byte) {
-}
-
-func marshalPathMTU(b []byte, cm *ControlMessage) []byte {
- m := socket.ControlMessage(b)
- m.MarshalHeader(iana.ProtocolIPv6, unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo)
- return m.Next(sizeofIPv6Mtuinfo)
-}
-
-func parsePathMTU(cm *ControlMessage, b []byte) {
- mi := (*ipv6Mtuinfo)(unsafe.Pointer(&b[0]))
- if len(cm.Dst) < net.IPv6len {
- cm.Dst = make(net.IP, net.IPv6len)
- }
- copy(cm.Dst, mi.Addr.Addr[:])
- cm.IfIndex = int(mi.Addr.Scope_id)
- cm.MTU = int(mi.Mtu)
-}
diff --git a/vendor/golang.org/x/net/ipv6/control_stub.go b/vendor/golang.org/x/net/ipv6/control_stub.go
deleted file mode 100644
index b7e8643..0000000
--- a/vendor/golang.org/x/net/ipv6/control_stub.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv6
-
-import "golang.org/x/net/internal/socket"
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/control_unix.go b/vendor/golang.org/x/net/ipv6/control_unix.go
deleted file mode 100644
index 63e475d..0000000
--- a/vendor/golang.org/x/net/ipv6/control_unix.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package ipv6
-
-import "golang.org/x/net/internal/socket"
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- opt.Lock()
- defer opt.Unlock()
- if so, ok := sockOpts[ssoReceiveTrafficClass]; ok && cf&FlagTrafficClass != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagTrafficClass)
- } else {
- opt.clear(FlagTrafficClass)
- }
- }
- if so, ok := sockOpts[ssoReceiveHopLimit]; ok && cf&FlagHopLimit != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagHopLimit)
- } else {
- opt.clear(FlagHopLimit)
- }
- }
- if so, ok := sockOpts[ssoReceivePacketInfo]; ok && cf&flagPacketInfo != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(cf & flagPacketInfo)
- } else {
- opt.clear(cf & flagPacketInfo)
- }
- }
- if so, ok := sockOpts[ssoReceivePathMTU]; ok && cf&FlagPathMTU != 0 {
- if err := so.SetInt(c, boolint(on)); err != nil {
- return err
- }
- if on {
- opt.set(FlagPathMTU)
- } else {
- opt.clear(FlagPathMTU)
- }
- }
- return nil
-}
diff --git a/vendor/golang.org/x/net/ipv6/control_windows.go b/vendor/golang.org/x/net/ipv6/control_windows.go
deleted file mode 100644
index 8882d81..0000000
--- a/vendor/golang.org/x/net/ipv6/control_windows.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import "golang.org/x/net/internal/socket"
-
-func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
- // TODO(mikio): implement this
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/dgramopt.go b/vendor/golang.org/x/net/ipv6/dgramopt.go
deleted file mode 100644
index 1f422e7..0000000
--- a/vendor/golang.org/x/net/ipv6/dgramopt.go
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/bpf"
-)
-
-// MulticastHopLimit returns the hop limit field value for outgoing
-// multicast packets.
-func (c *dgramOpt) MulticastHopLimit() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastHopLimit]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetMulticastHopLimit sets the hop limit field value for future
-// outgoing multicast packets.
-func (c *dgramOpt) SetMulticastHopLimit(hoplim int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastHopLimit]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, hoplim)
-}
-
-// MulticastInterface returns the default interface for multicast
-// packet transmissions.
-func (c *dgramOpt) MulticastInterface() (*net.Interface, error) {
- if !c.ok() {
- return nil, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastInterface]
- if !ok {
- return nil, errNotImplemented
- }
- return so.getMulticastInterface(c.Conn)
-}
-
-// SetMulticastInterface sets the default interface for future
-// multicast packet transmissions.
-func (c *dgramOpt) SetMulticastInterface(ifi *net.Interface) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastInterface]
- if !ok {
- return errNotImplemented
- }
- return so.setMulticastInterface(c.Conn, ifi)
-}
-
-// MulticastLoopback reports whether transmitted multicast packets
-// should be copied and send back to the originator.
-func (c *dgramOpt) MulticastLoopback() (bool, error) {
- if !c.ok() {
- return false, errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastLoopback]
- if !ok {
- return false, errNotImplemented
- }
- on, err := so.GetInt(c.Conn)
- if err != nil {
- return false, err
- }
- return on == 1, nil
-}
-
-// SetMulticastLoopback sets whether transmitted multicast packets
-// should be copied and send back to the originator.
-func (c *dgramOpt) SetMulticastLoopback(on bool) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoMulticastLoopback]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, boolint(on))
-}
-
-// JoinGroup joins the group address group on the interface ifi.
-// By default all sources that can cast data to group are accepted.
-// It's possible to mute and unmute data transmission from a specific
-// source by using ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup.
-// JoinGroup uses the system assigned multicast interface when ifi is
-// nil, although this is not recommended because the assignment
-// depends on platforms and sometimes it might require routing
-// configuration.
-func (c *dgramOpt) JoinGroup(ifi *net.Interface, group net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoJoinGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- return so.setGroup(c.Conn, ifi, grp)
-}
-
-// LeaveGroup leaves the group address group on the interface ifi
-// regardless of whether the group is any-source group or
-// source-specific group.
-func (c *dgramOpt) LeaveGroup(ifi *net.Interface, group net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoLeaveGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- return so.setGroup(c.Conn, ifi, grp)
-}
-
-// JoinSourceSpecificGroup joins the source-specific group comprising
-// group and source on the interface ifi.
-// JoinSourceSpecificGroup uses the system assigned multicast
-// interface when ifi is nil, although this is not recommended because
-// the assignment depends on platforms and sometimes it might require
-// routing configuration.
-func (c *dgramOpt) JoinSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoJoinSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP16(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// LeaveSourceSpecificGroup leaves the source-specific group on the
-// interface ifi.
-func (c *dgramOpt) LeaveSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoLeaveSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP16(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// ExcludeSourceSpecificGroup excludes the source-specific group from
-// the already joined any-source groups by JoinGroup on the interface
-// ifi.
-func (c *dgramOpt) ExcludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoBlockSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP16(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// IncludeSourceSpecificGroup includes the excluded source-specific
-// group by ExcludeSourceSpecificGroup again on the interface ifi.
-func (c *dgramOpt) IncludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoUnblockSourceGroup]
- if !ok {
- return errNotImplemented
- }
- grp := netAddrToIP16(group)
- if grp == nil {
- return errMissingAddress
- }
- src := netAddrToIP16(source)
- if src == nil {
- return errMissingAddress
- }
- return so.setSourceGroup(c.Conn, ifi, grp, src)
-}
-
-// Checksum reports whether the kernel will compute, store or verify a
-// checksum for both incoming and outgoing packets. If on is true, it
-// returns an offset in bytes into the data of where the checksum
-// field is located.
-func (c *dgramOpt) Checksum() (on bool, offset int, err error) {
- if !c.ok() {
- return false, 0, errInvalidConn
- }
- so, ok := sockOpts[ssoChecksum]
- if !ok {
- return false, 0, errNotImplemented
- }
- offset, err = so.GetInt(c.Conn)
- if err != nil {
- return false, 0, err
- }
- if offset < 0 {
- return false, 0, nil
- }
- return true, offset, nil
-}
-
-// SetChecksum enables the kernel checksum processing. If on is ture,
-// the offset should be an offset in bytes into the data of where the
-// checksum field is located.
-func (c *dgramOpt) SetChecksum(on bool, offset int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoChecksum]
- if !ok {
- return errNotImplemented
- }
- if !on {
- offset = -1
- }
- return so.SetInt(c.Conn, offset)
-}
-
-// ICMPFilter returns an ICMP filter.
-func (c *dgramOpt) ICMPFilter() (*ICMPFilter, error) {
- if !c.ok() {
- return nil, errInvalidConn
- }
- so, ok := sockOpts[ssoICMPFilter]
- if !ok {
- return nil, errNotImplemented
- }
- return so.getICMPFilter(c.Conn)
-}
-
-// SetICMPFilter deploys the ICMP filter.
-func (c *dgramOpt) SetICMPFilter(f *ICMPFilter) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoICMPFilter]
- if !ok {
- return errNotImplemented
- }
- return so.setICMPFilter(c.Conn, f)
-}
-
-// SetBPF attaches a BPF program to the connection.
-//
-// Only supported on Linux.
-func (c *dgramOpt) SetBPF(filter []bpf.RawInstruction) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoAttachFilter]
- if !ok {
- return errNotImplemented
- }
- return so.setBPF(c.Conn, filter)
-}
diff --git a/vendor/golang.org/x/net/ipv6/doc.go b/vendor/golang.org/x/net/ipv6/doc.go
deleted file mode 100644
index e0be9d5..0000000
--- a/vendor/golang.org/x/net/ipv6/doc.go
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package ipv6 implements IP-level socket options for the Internet
-// Protocol version 6.
-//
-// The package provides IP-level socket options that allow
-// manipulation of IPv6 facilities.
-//
-// The IPv6 protocol is defined in RFC 8200.
-// Socket interface extensions are defined in RFC 3493, RFC 3542 and
-// RFC 3678.
-// MLDv1 and MLDv2 are defined in RFC 2710 and RFC 3810.
-// Source-specific multicast is defined in RFC 4607.
-//
-// On Darwin, this package requires OS X Mavericks version 10.9 or
-// above, or equivalent.
-//
-//
-// Unicasting
-//
-// The options for unicasting are available for net.TCPConn,
-// net.UDPConn and net.IPConn which are created as network connections
-// that use the IPv6 transport. When a single TCP connection carrying
-// a data flow of multiple packets needs to indicate the flow is
-// important, Conn is used to set the traffic class field on the IPv6
-// header for each packet.
-//
-// ln, err := net.Listen("tcp6", "[::]:1024")
-// if err != nil {
-// // error handling
-// }
-// defer ln.Close()
-// for {
-// c, err := ln.Accept()
-// if err != nil {
-// // error handling
-// }
-// go func(c net.Conn) {
-// defer c.Close()
-//
-// The outgoing packets will be labeled DiffServ assured forwarding
-// class 1 low drop precedence, known as AF11 packets.
-//
-// if err := ipv6.NewConn(c).SetTrafficClass(0x28); err != nil {
-// // error handling
-// }
-// if _, err := c.Write(data); err != nil {
-// // error handling
-// }
-// }(c)
-// }
-//
-//
-// Multicasting
-//
-// The options for multicasting are available for net.UDPConn and
-// net.IPConn which are created as network connections that use the
-// IPv6 transport. A few network facilities must be prepared before
-// you begin multicasting, at a minimum joining network interfaces and
-// multicast groups.
-//
-// en0, err := net.InterfaceByName("en0")
-// if err != nil {
-// // error handling
-// }
-// en1, err := net.InterfaceByIndex(911)
-// if err != nil {
-// // error handling
-// }
-// group := net.ParseIP("ff02::114")
-//
-// First, an application listens to an appropriate address with an
-// appropriate service port.
-//
-// c, err := net.ListenPacket("udp6", "[::]:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c.Close()
-//
-// Second, the application joins multicast groups, starts listening to
-// the groups on the specified network interfaces. Note that the
-// service port for transport layer protocol does not matter with this
-// operation as joining groups affects only network and link layer
-// protocols, such as IPv6 and Ethernet.
-//
-// p := ipv6.NewPacketConn(c)
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: group}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en1, &net.UDPAddr{IP: group}); err != nil {
-// // error handling
-// }
-//
-// The application might set per packet control message transmissions
-// between the protocol stack within the kernel. When the application
-// needs a destination address on an incoming packet,
-// SetControlMessage of PacketConn is used to enable control message
-// transmissions.
-//
-// if err := p.SetControlMessage(ipv6.FlagDst, true); err != nil {
-// // error handling
-// }
-//
-// The application could identify whether the received packets are
-// of interest by using the control message that contains the
-// destination address of the received packet.
-//
-// b := make([]byte, 1500)
-// for {
-// n, rcm, src, err := p.ReadFrom(b)
-// if err != nil {
-// // error handling
-// }
-// if rcm.Dst.IsMulticast() {
-// if rcm.Dst.Equal(group) {
-// // joined group, do something
-// } else {
-// // unknown group, discard
-// continue
-// }
-// }
-//
-// The application can also send both unicast and multicast packets.
-//
-// p.SetTrafficClass(0x0)
-// p.SetHopLimit(16)
-// if _, err := p.WriteTo(data[:n], nil, src); err != nil {
-// // error handling
-// }
-// dst := &net.UDPAddr{IP: group, Port: 1024}
-// wcm := ipv6.ControlMessage{TrafficClass: 0xe0, HopLimit: 1}
-// for _, ifi := range []*net.Interface{en0, en1} {
-// wcm.IfIndex = ifi.Index
-// if _, err := p.WriteTo(data[:n], &wcm, dst); err != nil {
-// // error handling
-// }
-// }
-// }
-//
-//
-// More multicasting
-//
-// An application that uses PacketConn may join multiple multicast
-// groups. For example, a UDP listener with port 1024 might join two
-// different groups across over two different network interfaces by
-// using:
-//
-// c, err := net.ListenPacket("udp6", "[::]:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c.Close()
-// p := ipv6.NewPacketConn(c)
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::1:114")}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::2:114")}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en1, &net.UDPAddr{IP: net.ParseIP("ff02::2:114")}); err != nil {
-// // error handling
-// }
-//
-// It is possible for multiple UDP listeners that listen on the same
-// UDP port to join the same multicast group. The net package will
-// provide a socket that listens to a wildcard address with reusable
-// UDP port when an appropriate multicast address prefix is passed to
-// the net.ListenPacket or net.ListenUDP.
-//
-// c1, err := net.ListenPacket("udp6", "[ff02::]:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c1.Close()
-// c2, err := net.ListenPacket("udp6", "[ff02::]:1024")
-// if err != nil {
-// // error handling
-// }
-// defer c2.Close()
-// p1 := ipv6.NewPacketConn(c1)
-// if err := p1.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
-// // error handling
-// }
-// p2 := ipv6.NewPacketConn(c2)
-// if err := p2.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
-// // error handling
-// }
-//
-// Also it is possible for the application to leave or rejoin a
-// multicast group on the network interface.
-//
-// if err := p.LeaveGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
-// // error handling
-// }
-// if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff01::114")}); err != nil {
-// // error handling
-// }
-//
-//
-// Source-specific multicasting
-//
-// An application that uses PacketConn on MLDv2 supported platform is
-// able to join source-specific multicast groups.
-// The application may use JoinSourceSpecificGroup and
-// LeaveSourceSpecificGroup for the operation known as "include" mode,
-//
-// ssmgroup := net.UDPAddr{IP: net.ParseIP("ff32::8000:9")}
-// ssmsource := net.UDPAddr{IP: net.ParseIP("fe80::cafe")}
-// if err := p.JoinSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
-// // error handling
-// }
-// if err := p.LeaveSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
-// // error handling
-// }
-//
-// or JoinGroup, ExcludeSourceSpecificGroup,
-// IncludeSourceSpecificGroup and LeaveGroup for the operation known
-// as "exclude" mode.
-//
-// exclsource := net.UDPAddr{IP: net.ParseIP("fe80::dead")}
-// if err := p.JoinGroup(en0, &ssmgroup); err != nil {
-// // error handling
-// }
-// if err := p.ExcludeSourceSpecificGroup(en0, &ssmgroup, &exclsource); err != nil {
-// // error handling
-// }
-// if err := p.LeaveGroup(en0, &ssmgroup); err != nil {
-// // error handling
-// }
-//
-// Note that it depends on each platform implementation what happens
-// when an application which runs on MLDv2 unsupported platform uses
-// JoinSourceSpecificGroup and LeaveSourceSpecificGroup.
-// In general the platform tries to fall back to conversations using
-// MLDv1 and starts to listen to multicast traffic.
-// In the fallback case, ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup may return an error.
-package ipv6 // import "golang.org/x/net/ipv6"
-
-// BUG(mikio): This package is not implemented on JS, NaCl and Plan 9.
diff --git a/vendor/golang.org/x/net/ipv6/endpoint.go b/vendor/golang.org/x/net/ipv6/endpoint.go
deleted file mode 100644
index f534a0b..0000000
--- a/vendor/golang.org/x/net/ipv6/endpoint.go
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "time"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the JoinSourceSpecificGroup,
-// LeaveSourceSpecificGroup, ExcludeSourceSpecificGroup and
-// IncludeSourceSpecificGroup methods of PacketConn are not
-// implemented.
-
-// A Conn represents a network endpoint that uses IPv6 transport.
-// It allows to set basic IP-level socket options such as traffic
-// class and hop limit.
-type Conn struct {
- genericOpt
-}
-
-type genericOpt struct {
- *socket.Conn
-}
-
-func (c *genericOpt) ok() bool { return c != nil && c.Conn != nil }
-
-// PathMTU returns a path MTU value for the destination associated
-// with the endpoint.
-func (c *Conn) PathMTU() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoPathMTU]
- if !ok {
- return 0, errNotImplemented
- }
- _, mtu, err := so.getMTUInfo(c.Conn)
- if err != nil {
- return 0, err
- }
- return mtu, nil
-}
-
-// NewConn returns a new Conn.
-func NewConn(c net.Conn) *Conn {
- cc, _ := socket.NewConn(c)
- return &Conn{
- genericOpt: genericOpt{Conn: cc},
- }
-}
-
-// A PacketConn represents a packet network endpoint that uses IPv6
-// transport. It is used to control several IP-level socket options
-// including IPv6 header manipulation. It also provides datagram
-// based network I/O methods specific to the IPv6 and higher layer
-// protocols such as OSPF, GRE, and UDP.
-type PacketConn struct {
- genericOpt
- dgramOpt
- payloadHandler
-}
-
-type dgramOpt struct {
- *socket.Conn
-}
-
-func (c *dgramOpt) ok() bool { return c != nil && c.Conn != nil }
-
-// SetControlMessage allows to receive the per packet basis IP-level
-// socket options.
-func (c *PacketConn) SetControlMessage(cf ControlFlags, on bool) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return setControlMessage(c.dgramOpt.Conn, &c.payloadHandler.rawOpt, cf, on)
-}
-
-// SetDeadline sets the read and write deadlines associated with the
-// endpoint.
-func (c *PacketConn) SetDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.SetDeadline(t)
-}
-
-// SetReadDeadline sets the read deadline associated with the
-// endpoint.
-func (c *PacketConn) SetReadDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.SetReadDeadline(t)
-}
-
-// SetWriteDeadline sets the write deadline associated with the
-// endpoint.
-func (c *PacketConn) SetWriteDeadline(t time.Time) error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.SetWriteDeadline(t)
-}
-
-// Close closes the endpoint.
-func (c *PacketConn) Close() error {
- if !c.payloadHandler.ok() {
- return errInvalidConn
- }
- return c.payloadHandler.Close()
-}
-
-// NewPacketConn returns a new PacketConn using c as its underlying
-// transport.
-func NewPacketConn(c net.PacketConn) *PacketConn {
- cc, _ := socket.NewConn(c.(net.Conn))
- return &PacketConn{
- genericOpt: genericOpt{Conn: cc},
- dgramOpt: dgramOpt{Conn: cc},
- payloadHandler: payloadHandler{PacketConn: c, Conn: cc},
- }
-}
diff --git a/vendor/golang.org/x/net/ipv6/genericopt.go b/vendor/golang.org/x/net/ipv6/genericopt.go
deleted file mode 100644
index 0326aed..0000000
--- a/vendor/golang.org/x/net/ipv6/genericopt.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-// TrafficClass returns the traffic class field value for outgoing
-// packets.
-func (c *genericOpt) TrafficClass() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoTrafficClass]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetTrafficClass sets the traffic class field value for future
-// outgoing packets.
-func (c *genericOpt) SetTrafficClass(tclass int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoTrafficClass]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, tclass)
-}
-
-// HopLimit returns the hop limit field value for outgoing packets.
-func (c *genericOpt) HopLimit() (int, error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- so, ok := sockOpts[ssoHopLimit]
- if !ok {
- return 0, errNotImplemented
- }
- return so.GetInt(c.Conn)
-}
-
-// SetHopLimit sets the hop limit field value for future outgoing
-// packets.
-func (c *genericOpt) SetHopLimit(hoplim int) error {
- if !c.ok() {
- return errInvalidConn
- }
- so, ok := sockOpts[ssoHopLimit]
- if !ok {
- return errNotImplemented
- }
- return so.SetInt(c.Conn, hoplim)
-}
diff --git a/vendor/golang.org/x/net/ipv6/header.go b/vendor/golang.org/x/net/ipv6/header.go
deleted file mode 100644
index e05cb08..0000000
--- a/vendor/golang.org/x/net/ipv6/header.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "encoding/binary"
- "fmt"
- "net"
-)
-
-const (
- Version = 6 // protocol version
- HeaderLen = 40 // header length
-)
-
-// A Header represents an IPv6 base header.
-type Header struct {
- Version int // protocol version
- TrafficClass int // traffic class
- FlowLabel int // flow label
- PayloadLen int // payload length
- NextHeader int // next header
- HopLimit int // hop limit
- Src net.IP // source address
- Dst net.IP // destination address
-}
-
-func (h *Header) String() string {
- if h == nil {
- return ""
- }
- return fmt.Sprintf("ver=%d tclass=%#x flowlbl=%#x payloadlen=%d nxthdr=%d hoplim=%d src=%v dst=%v", h.Version, h.TrafficClass, h.FlowLabel, h.PayloadLen, h.NextHeader, h.HopLimit, h.Src, h.Dst)
-}
-
-// ParseHeader parses b as an IPv6 base header.
-func ParseHeader(b []byte) (*Header, error) {
- if len(b) < HeaderLen {
- return nil, errHeaderTooShort
- }
- h := &Header{
- Version: int(b[0]) >> 4,
- TrafficClass: int(b[0]&0x0f)<<4 | int(b[1])>>4,
- FlowLabel: int(b[1]&0x0f)<<16 | int(b[2])<<8 | int(b[3]),
- PayloadLen: int(binary.BigEndian.Uint16(b[4:6])),
- NextHeader: int(b[6]),
- HopLimit: int(b[7]),
- }
- h.Src = make(net.IP, net.IPv6len)
- copy(h.Src, b[8:24])
- h.Dst = make(net.IP, net.IPv6len)
- copy(h.Dst, b[24:40])
- return h, nil
-}
diff --git a/vendor/golang.org/x/net/ipv6/helper.go b/vendor/golang.org/x/net/ipv6/helper.go
deleted file mode 100644
index c2d508f..0000000
--- a/vendor/golang.org/x/net/ipv6/helper.go
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "errors"
- "net"
- "runtime"
-)
-
-var (
- errInvalidConn = errors.New("invalid connection")
- errMissingAddress = errors.New("missing address")
- errHeaderTooShort = errors.New("header too short")
- errInvalidConnType = errors.New("invalid conn type")
- errNotImplemented = errors.New("not implemented on " + runtime.GOOS + "/" + runtime.GOARCH)
-)
-
-func boolint(b bool) int {
- if b {
- return 1
- }
- return 0
-}
-
-func netAddrToIP16(a net.Addr) net.IP {
- switch v := a.(type) {
- case *net.UDPAddr:
- if ip := v.IP.To16(); ip != nil && ip.To4() == nil {
- return ip
- }
- case *net.IPAddr:
- if ip := v.IP.To16(); ip != nil && ip.To4() == nil {
- return ip
- }
- }
- return nil
-}
-
-func opAddr(a net.Addr) net.Addr {
- switch a.(type) {
- case *net.TCPAddr:
- if a == nil {
- return nil
- }
- case *net.UDPAddr:
- if a == nil {
- return nil
- }
- case *net.IPAddr:
- if a == nil {
- return nil
- }
- }
- return a
-}
diff --git a/vendor/golang.org/x/net/ipv6/iana.go b/vendor/golang.org/x/net/ipv6/iana.go
deleted file mode 100644
index 32db1aa..0000000
--- a/vendor/golang.org/x/net/ipv6/iana.go
+++ /dev/null
@@ -1,86 +0,0 @@
-// go generate gen.go
-// Code generated by the command above; DO NOT EDIT.
-
-package ipv6
-
-// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
-const (
- ICMPTypeDestinationUnreachable ICMPType = 1 // Destination Unreachable
- ICMPTypePacketTooBig ICMPType = 2 // Packet Too Big
- ICMPTypeTimeExceeded ICMPType = 3 // Time Exceeded
- ICMPTypeParameterProblem ICMPType = 4 // Parameter Problem
- ICMPTypeEchoRequest ICMPType = 128 // Echo Request
- ICMPTypeEchoReply ICMPType = 129 // Echo Reply
- ICMPTypeMulticastListenerQuery ICMPType = 130 // Multicast Listener Query
- ICMPTypeMulticastListenerReport ICMPType = 131 // Multicast Listener Report
- ICMPTypeMulticastListenerDone ICMPType = 132 // Multicast Listener Done
- ICMPTypeRouterSolicitation ICMPType = 133 // Router Solicitation
- ICMPTypeRouterAdvertisement ICMPType = 134 // Router Advertisement
- ICMPTypeNeighborSolicitation ICMPType = 135 // Neighbor Solicitation
- ICMPTypeNeighborAdvertisement ICMPType = 136 // Neighbor Advertisement
- ICMPTypeRedirect ICMPType = 137 // Redirect Message
- ICMPTypeRouterRenumbering ICMPType = 138 // Router Renumbering
- ICMPTypeNodeInformationQuery ICMPType = 139 // ICMP Node Information Query
- ICMPTypeNodeInformationResponse ICMPType = 140 // ICMP Node Information Response
- ICMPTypeInverseNeighborDiscoverySolicitation ICMPType = 141 // Inverse Neighbor Discovery Solicitation Message
- ICMPTypeInverseNeighborDiscoveryAdvertisement ICMPType = 142 // Inverse Neighbor Discovery Advertisement Message
- ICMPTypeVersion2MulticastListenerReport ICMPType = 143 // Version 2 Multicast Listener Report
- ICMPTypeHomeAgentAddressDiscoveryRequest ICMPType = 144 // Home Agent Address Discovery Request Message
- ICMPTypeHomeAgentAddressDiscoveryReply ICMPType = 145 // Home Agent Address Discovery Reply Message
- ICMPTypeMobilePrefixSolicitation ICMPType = 146 // Mobile Prefix Solicitation
- ICMPTypeMobilePrefixAdvertisement ICMPType = 147 // Mobile Prefix Advertisement
- ICMPTypeCertificationPathSolicitation ICMPType = 148 // Certification Path Solicitation Message
- ICMPTypeCertificationPathAdvertisement ICMPType = 149 // Certification Path Advertisement Message
- ICMPTypeMulticastRouterAdvertisement ICMPType = 151 // Multicast Router Advertisement
- ICMPTypeMulticastRouterSolicitation ICMPType = 152 // Multicast Router Solicitation
- ICMPTypeMulticastRouterTermination ICMPType = 153 // Multicast Router Termination
- ICMPTypeFMIPv6 ICMPType = 154 // FMIPv6 Messages
- ICMPTypeRPLControl ICMPType = 155 // RPL Control Message
- ICMPTypeILNPv6LocatorUpdate ICMPType = 156 // ILNPv6 Locator Update Message
- ICMPTypeDuplicateAddressRequest ICMPType = 157 // Duplicate Address Request
- ICMPTypeDuplicateAddressConfirmation ICMPType = 158 // Duplicate Address Confirmation
- ICMPTypeMPLControl ICMPType = 159 // MPL Control Message
- ICMPTypeExtendedEchoRequest ICMPType = 160 // Extended Echo Request
- ICMPTypeExtendedEchoReply ICMPType = 161 // Extended Echo Reply
-)
-
-// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
-var icmpTypes = map[ICMPType]string{
- 1: "destination unreachable",
- 2: "packet too big",
- 3: "time exceeded",
- 4: "parameter problem",
- 128: "echo request",
- 129: "echo reply",
- 130: "multicast listener query",
- 131: "multicast listener report",
- 132: "multicast listener done",
- 133: "router solicitation",
- 134: "router advertisement",
- 135: "neighbor solicitation",
- 136: "neighbor advertisement",
- 137: "redirect message",
- 138: "router renumbering",
- 139: "icmp node information query",
- 140: "icmp node information response",
- 141: "inverse neighbor discovery solicitation message",
- 142: "inverse neighbor discovery advertisement message",
- 143: "version 2 multicast listener report",
- 144: "home agent address discovery request message",
- 145: "home agent address discovery reply message",
- 146: "mobile prefix solicitation",
- 147: "mobile prefix advertisement",
- 148: "certification path solicitation message",
- 149: "certification path advertisement message",
- 151: "multicast router advertisement",
- 152: "multicast router solicitation",
- 153: "multicast router termination",
- 154: "fmipv6 messages",
- 155: "rpl control message",
- 156: "ilnpv6 locator update message",
- 157: "duplicate address request",
- 158: "duplicate address confirmation",
- 159: "mpl control message",
- 160: "extended echo request",
- 161: "extended echo reply",
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp.go b/vendor/golang.org/x/net/ipv6/icmp.go
deleted file mode 100644
index b7f48e2..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import "golang.org/x/net/internal/iana"
-
-// BUG(mikio): On Windows, methods related to ICMPFilter are not
-// implemented.
-
-// An ICMPType represents a type of ICMP message.
-type ICMPType int
-
-func (typ ICMPType) String() string {
- s, ok := icmpTypes[typ]
- if !ok {
- return ""
- }
- return s
-}
-
-// Protocol returns the ICMPv6 protocol number.
-func (typ ICMPType) Protocol() int {
- return iana.ProtocolIPv6ICMP
-}
-
-// An ICMPFilter represents an ICMP message filter for incoming
-// packets. The filter belongs to a packet delivery path on a host and
-// it cannot interact with forwarding packets or tunnel-outer packets.
-//
-// Note: RFC 8200 defines a reasonable role model. A node means a
-// device that implements IP. A router means a node that forwards IP
-// packets not explicitly addressed to itself, and a host means a node
-// that is not a router.
-type ICMPFilter struct {
- icmpv6Filter
-}
-
-// Accept accepts incoming ICMP packets including the type field value
-// typ.
-func (f *ICMPFilter) Accept(typ ICMPType) {
- f.accept(typ)
-}
-
-// Block blocks incoming ICMP packets including the type field value
-// typ.
-func (f *ICMPFilter) Block(typ ICMPType) {
- f.block(typ)
-}
-
-// SetAll sets the filter action to the filter.
-func (f *ICMPFilter) SetAll(block bool) {
- f.setAll(block)
-}
-
-// WillBlock reports whether the ICMP type will be blocked.
-func (f *ICMPFilter) WillBlock(typ ICMPType) bool {
- return f.willBlock(typ)
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_bsd.go b/vendor/golang.org/x/net/ipv6/icmp_bsd.go
deleted file mode 100644
index 120bf87..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_bsd.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || netbsd || openbsd
-// +build aix darwin dragonfly freebsd netbsd openbsd
-
-package ipv6
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
- f.Filt[typ>>5] |= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
- f.Filt[typ>>5] &^= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
- for i := range f.Filt {
- if block {
- f.Filt[i] = 0
- } else {
- f.Filt[i] = 1<<32 - 1
- }
- }
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- return f.Filt[typ>>5]&(1<<(uint32(typ)&31)) == 0
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_linux.go b/vendor/golang.org/x/net/ipv6/icmp_linux.go
deleted file mode 100644
index 647f6b4..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_linux.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
- f.Data[typ>>5] &^= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
- f.Data[typ>>5] |= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
- for i := range f.Data {
- if block {
- f.Data[i] = 1<<32 - 1
- } else {
- f.Data[i] = 0
- }
- }
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- return f.Data[typ>>5]&(1<<(uint32(typ)&31)) != 0
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_solaris.go b/vendor/golang.org/x/net/ipv6/icmp_solaris.go
deleted file mode 100644
index 7c23bb1..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_solaris.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
- f.X__icmp6_filt[typ>>5] |= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
- f.X__icmp6_filt[typ>>5] &^= 1 << (uint32(typ) & 31)
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
- for i := range f.X__icmp6_filt {
- if block {
- f.X__icmp6_filt[i] = 0
- } else {
- f.X__icmp6_filt[i] = 1<<32 - 1
- }
- }
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- return f.X__icmp6_filt[typ>>5]&(1<<(uint32(typ)&31)) == 0
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_stub.go b/vendor/golang.org/x/net/ipv6/icmp_stub.go
deleted file mode 100644
index d60136a..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_stub.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv6
-
-type icmpv6Filter struct {
-}
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- return false
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_windows.go b/vendor/golang.org/x/net/ipv6/icmp_windows.go
deleted file mode 100644
index 443cd07..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_windows.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
- // TODO(mikio): implement this
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
- // TODO(mikio): implement this
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
- // TODO(mikio): implement this
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- // TODO(mikio): implement this
- return false
-}
diff --git a/vendor/golang.org/x/net/ipv6/icmp_zos.go b/vendor/golang.org/x/net/ipv6/icmp_zos.go
deleted file mode 100644
index ddf8f09..0000000
--- a/vendor/golang.org/x/net/ipv6/icmp_zos.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-func (f *icmpv6Filter) accept(typ ICMPType) {
- f.Filt[typ>>5] |= 1 << (uint32(typ) & 31)
-
-}
-
-func (f *icmpv6Filter) block(typ ICMPType) {
- f.Filt[typ>>5] &^= 1 << (uint32(typ) & 31)
-
-}
-
-func (f *icmpv6Filter) setAll(block bool) {
- for i := range f.Filt {
- if block {
- f.Filt[i] = 0
- } else {
- f.Filt[i] = 1<<32 - 1
- }
- }
-}
-
-func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
- return f.Filt[typ>>5]&(1<<(uint32(typ)&31)) == 0
-}
diff --git a/vendor/golang.org/x/net/ipv6/payload.go b/vendor/golang.org/x/net/ipv6/payload.go
deleted file mode 100644
index a8197f1..0000000
--- a/vendor/golang.org/x/net/ipv6/payload.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-// BUG(mikio): On Windows, the ControlMessage for ReadFrom and WriteTo
-// methods of PacketConn is not implemented.
-
-// A payloadHandler represents the IPv6 datagram payload handler.
-type payloadHandler struct {
- net.PacketConn
- *socket.Conn
- rawOpt
-}
-
-func (c *payloadHandler) ok() bool { return c != nil && c.PacketConn != nil && c.Conn != nil }
diff --git a/vendor/golang.org/x/net/ipv6/payload_cmsg.go b/vendor/golang.org/x/net/ipv6/payload_cmsg.go
deleted file mode 100644
index b0692e4..0000000
--- a/vendor/golang.org/x/net/ipv6/payload_cmsg.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-// ReadFrom reads a payload of the received IPv6 datagram, from the
-// endpoint c, copying the payload into b. It returns the number of
-// bytes copied into b, the control message cm and the source address
-// src of the received datagram.
-func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
- if !c.ok() {
- return 0, nil, nil, errInvalidConn
- }
- c.rawOpt.RLock()
- m := socket.Message{
- Buffers: [][]byte{b},
- OOB: NewControlMessage(c.rawOpt.cflags),
- }
- c.rawOpt.RUnlock()
- switch c.PacketConn.(type) {
- case *net.UDPConn:
- if err := c.RecvMsg(&m, 0); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- case *net.IPConn:
- if err := c.RecvMsg(&m, 0); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- default:
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: errInvalidConnType}
- }
- if m.NN > 0 {
- cm = new(ControlMessage)
- if err := cm.Parse(m.OOB[:m.NN]); err != nil {
- return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
- }
- cm.Src = netAddrToIP16(m.Addr)
- }
- return m.N, cm, m.Addr, nil
-}
-
-// WriteTo writes a payload of the IPv6 datagram, to the destination
-// address dst through the endpoint c, copying the payload from b. It
-// returns the number of bytes written. The control message cm allows
-// the IPv6 header fields and the datagram path to be specified. The
-// cm may be nil if control of the outgoing datagram is not required.
-func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- m := socket.Message{
- Buffers: [][]byte{b},
- OOB: cm.Marshal(),
- Addr: dst,
- }
- err = c.SendMsg(&m, 0)
- if err != nil {
- err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Addr: opAddr(dst), Err: err}
- }
- return m.N, err
-}
diff --git a/vendor/golang.org/x/net/ipv6/payload_nocmsg.go b/vendor/golang.org/x/net/ipv6/payload_nocmsg.go
deleted file mode 100644
index cd0ff50..0000000
--- a/vendor/golang.org/x/net/ipv6/payload_nocmsg.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!zos
-
-package ipv6
-
-import "net"
-
-// ReadFrom reads a payload of the received IPv6 datagram, from the
-// endpoint c, copying the payload into b. It returns the number of
-// bytes copied into b, the control message cm and the source address
-// src of the received datagram.
-func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
- if !c.ok() {
- return 0, nil, nil, errInvalidConn
- }
- if n, src, err = c.PacketConn.ReadFrom(b); err != nil {
- return 0, nil, nil, err
- }
- return
-}
-
-// WriteTo writes a payload of the IPv6 datagram, to the destination
-// address dst through the endpoint c, copying the payload from b. It
-// returns the number of bytes written. The control message cm allows
-// the IPv6 header fields and the datagram path to be specified. The
-// cm may be nil if control of the outgoing datagram is not required.
-func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
- if !c.ok() {
- return 0, errInvalidConn
- }
- if dst == nil {
- return 0, errMissingAddress
- }
- return c.PacketConn.WriteTo(b, dst)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sockopt.go b/vendor/golang.org/x/net/ipv6/sockopt.go
deleted file mode 100644
index cc3907d..0000000
--- a/vendor/golang.org/x/net/ipv6/sockopt.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import "golang.org/x/net/internal/socket"
-
-// Sticky socket options
-const (
- ssoTrafficClass = iota // header field for unicast packet, RFC 3542
- ssoHopLimit // header field for unicast packet, RFC 3493
- ssoMulticastInterface // outbound interface for multicast packet, RFC 3493
- ssoMulticastHopLimit // header field for multicast packet, RFC 3493
- ssoMulticastLoopback // loopback for multicast packet, RFC 3493
- ssoReceiveTrafficClass // header field on received packet, RFC 3542
- ssoReceiveHopLimit // header field on received packet, RFC 2292 or 3542
- ssoReceivePacketInfo // incbound or outbound packet path, RFC 2292 or 3542
- ssoReceivePathMTU // path mtu, RFC 3542
- ssoPathMTU // path mtu, RFC 3542
- ssoChecksum // packet checksum, RFC 2292 or 3542
- ssoICMPFilter // icmp filter, RFC 2292 or 3542
- ssoJoinGroup // any-source multicast, RFC 3493
- ssoLeaveGroup // any-source multicast, RFC 3493
- ssoJoinSourceGroup // source-specific multicast
- ssoLeaveSourceGroup // source-specific multicast
- ssoBlockSourceGroup // any-source or source-specific multicast
- ssoUnblockSourceGroup // any-source or source-specific multicast
- ssoAttachFilter // attach BPF for filtering inbound traffic
-)
-
-// Sticky socket option value types
-const (
- ssoTypeIPMreq = iota + 1
- ssoTypeGroupReq
- ssoTypeGroupSourceReq
-)
-
-// A sockOpt represents a binding for sticky socket option.
-type sockOpt struct {
- socket.Option
- typ int // hint for option value type; optional
-}
diff --git a/vendor/golang.org/x/net/ipv6/sockopt_posix.go b/vendor/golang.org/x/net/ipv6/sockopt_posix.go
deleted file mode 100644
index 37c6287..0000000
--- a/vendor/golang.org/x/net/ipv6/sockopt_posix.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows zos
-
-package ipv6
-
-import (
- "net"
- "runtime"
- "unsafe"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
- n, err := so.GetInt(c)
- if err != nil {
- return nil, err
- }
- return net.InterfaceByIndex(n)
-}
-
-func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
- var n int
- if ifi != nil {
- n = ifi.Index
- }
- return so.SetInt(c, n)
-}
-
-func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
- b := make([]byte, so.Len)
- n, err := so.Get(c, b)
- if err != nil {
- return nil, err
- }
- if n != sizeofICMPv6Filter {
- return nil, errNotImplemented
- }
- return (*ICMPFilter)(unsafe.Pointer(&b[0])), nil
-}
-
-func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
- b := (*[sizeofICMPv6Filter]byte)(unsafe.Pointer(f))[:sizeofICMPv6Filter]
- return so.Set(c, b)
-}
-
-func (so *sockOpt) getMTUInfo(c *socket.Conn) (*net.Interface, int, error) {
- b := make([]byte, so.Len)
- n, err := so.Get(c, b)
- if err != nil {
- return nil, 0, err
- }
- if n != sizeofIPv6Mtuinfo {
- return nil, 0, errNotImplemented
- }
- mi := (*ipv6Mtuinfo)(unsafe.Pointer(&b[0]))
- if mi.Addr.Scope_id == 0 || runtime.GOOS == "aix" {
- // AIX kernel might return a wrong address.
- return nil, int(mi.Mtu), nil
- }
- ifi, err := net.InterfaceByIndex(int(mi.Addr.Scope_id))
- if err != nil {
- return nil, 0, err
- }
- return ifi, int(mi.Mtu), nil
-}
-
-func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- switch so.typ {
- case ssoTypeIPMreq:
- return so.setIPMreq(c, ifi, grp)
- case ssoTypeGroupReq:
- return so.setGroupReq(c, ifi, grp)
- default:
- return errNotImplemented
- }
-}
-
-func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return so.setGroupSourceReq(c, ifi, grp, src)
-}
-
-func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
- return so.setAttachFilter(c, f)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sockopt_stub.go b/vendor/golang.org/x/net/ipv6/sockopt_stub.go
deleted file mode 100644
index 32fd866..0000000
--- a/vendor/golang.org/x/net/ipv6/sockopt_stub.go
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
- return nil, errNotImplemented
-}
-
-func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) getMTUInfo(c *socket.Conn) (*net.Interface, int, error) {
- return nil, 0, errNotImplemented
-}
-
-func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_aix.go b/vendor/golang.org/x/net/ipv6/sys_aix.go
deleted file mode 100644
index a47182a..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_aix.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Added for go1.11 compatibility
-//go:build aix
-// +build aix
-
-package ipv6
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlNextHop: {unix.IPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_JOIN_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_LEAVE_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = int32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 132))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_asmreq.go b/vendor/golang.org/x/net/ipv6/sys_asmreq.go
deleted file mode 100644
index 6ff9950..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_asmreq.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows
-
-package ipv6
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- var mreq ipv6Mreq
- copy(mreq.Multiaddr[:], grp)
- if ifi != nil {
- mreq.setIfindex(ifi.Index)
- }
- b := (*[sizeofIPv6Mreq]byte)(unsafe.Pointer(&mreq))[:sizeofIPv6Mreq]
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go b/vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go
deleted file mode 100644
index 485290c..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_bpf.go b/vendor/golang.org/x/net/ipv6/sys_bpf.go
deleted file mode 100644
index b5661fb..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_bpf.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux
-// +build linux
-
-package ipv6
-
-import (
- "unsafe"
-
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
- "golang.org/x/sys/unix"
-)
-
-func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
- prog := unix.SockFprog{
- Len: uint16(len(f)),
- Filter: (*unix.SockFilter)(unsafe.Pointer(&f[0])),
- }
- b := (*[unix.SizeofSockFprog]byte)(unsafe.Pointer(&prog))[:unix.SizeofSockFprog]
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_bpf_stub.go b/vendor/golang.org/x/net/ipv6/sys_bpf_stub.go
deleted file mode 100644
index cb00661..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_bpf_stub.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux
-// +build !linux
-
-package ipv6
-
-import (
- "golang.org/x/net/bpf"
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_bsd.go b/vendor/golang.org/x/net/ipv6/sys_bsd.go
deleted file mode 100644
index bde41a6..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_bsd.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build dragonfly || netbsd || openbsd
-// +build dragonfly netbsd openbsd
-
-package ipv6
-
-import (
- "net"
- "syscall"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlNextHop: {unix.IPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_JOIN_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_LEAVE_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_darwin.go b/vendor/golang.org/x/net/ipv6/sys_darwin.go
deleted file mode 100644
index b80ec80..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_darwin.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlNextHop: {unix.IPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 132))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_freebsd.go b/vendor/golang.org/x/net/ipv6/sys_freebsd.go
deleted file mode 100644
index 6282cf9..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_freebsd.go
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "runtime"
- "strings"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlNextHop: {unix.IPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func init() {
- if runtime.GOOS == "freebsd" && runtime.GOARCH == "386" {
- archs, _ := syscall.Sysctl("kern.supported_archs")
- for _, s := range strings.Fields(archs) {
- if s == "amd64" {
- compatFreeBSD32 = true
- break
- }
- }
- }
-}
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gr.Group))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gsr.Group))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(&gsr.Source))
- sa.Len = sizeofSockaddrInet6
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_linux.go b/vendor/golang.org/x/net/ipv6/sys_linux.go
deleted file mode 100644
index 82e2121..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_linux.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolReserved, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMPV6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoAttachFilter: {Option: socket.Option{Level: unix.SOL_SOCKET, Name: unix.SO_ATTACH_FILTER, Len: unix.SizeofSockFprog}},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = int32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Ifindex = int32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gr.Group))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gsr.Group))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(&gsr.Source))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_solaris.go b/vendor/golang.org/x/net/ipv6/sys_solaris.go
deleted file mode 100644
index 1fc30ad..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_solaris.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlTrafficClass: {unix.IPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlNextHop: {unix.IPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoPathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 260))
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_ssmreq.go b/vendor/golang.org/x/net/ipv6/sys_ssmreq.go
deleted file mode 100644
index 023488a..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_ssmreq.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || freebsd || linux || solaris || zos
-// +build aix darwin freebsd linux solaris zos
-
-package ipv6
-
-import (
- "net"
- "unsafe"
-
- "golang.org/x/net/internal/socket"
-)
-
-var compatFreeBSD32 bool // 386 emulation on amd64
-
-func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- var gr groupReq
- if ifi != nil {
- gr.Interface = uint32(ifi.Index)
- }
- gr.setGroup(grp)
- var b []byte
- if compatFreeBSD32 {
- var d [sizeofGroupReq + 4]byte
- s := (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))
- copy(d[:4], s[:4])
- copy(d[8:], s[4:])
- b = d[:]
- } else {
- b = (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))[:sizeofGroupReq]
- }
- return so.Set(c, b)
-}
-
-func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- var gsr groupSourceReq
- if ifi != nil {
- gsr.Interface = uint32(ifi.Index)
- }
- gsr.setSourceGroup(grp, src)
- var b []byte
- if compatFreeBSD32 {
- var d [sizeofGroupSourceReq + 4]byte
- s := (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))
- copy(d[:4], s[:4])
- copy(d[8:], s[4:])
- b = d[:]
- } else {
- b = (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))[:sizeofGroupSourceReq]
- }
- return so.Set(c, b)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go b/vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go
deleted file mode 100644
index acdf2e5..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !freebsd && !linux && !solaris && !zos
-// +build !aix,!darwin,!freebsd,!linux,!solaris,!zos
-
-package ipv6
-
-import (
- "net"
-
- "golang.org/x/net/internal/socket"
-)
-
-func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
- return errNotImplemented
-}
-
-func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
- return errNotImplemented
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_stub.go b/vendor/golang.org/x/net/ipv6/sys_stub.go
deleted file mode 100644
index 5807bba..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_stub.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos
-// +build !aix,!darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos
-
-package ipv6
-
-var (
- ctlOpts = [ctlMax]ctlOpt{}
-
- sockOpts = map[int]*sockOpt{}
-)
diff --git a/vendor/golang.org/x/net/ipv6/sys_windows.go b/vendor/golang.org/x/net/ipv6/sys_windows.go
deleted file mode 100644
index fda8a29..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_windows.go
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "syscall"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/windows"
-)
-
-const (
- sizeofSockaddrInet6 = 0x1c
-
- sizeofIPv6Mreq = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofICMPv6Filter = 0
-)
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type icmpv6Filter struct {
- // TODO(mikio): implement this
-}
-
-var (
- ctlOpts = [ctlMax]ctlOpt{}
-
- sockOpts = map[int]*sockOpt{
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_JOIN_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: windows.IPV6_LEAVE_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (mreq *ipv6Mreq) setIfindex(i int) {
- mreq.Interface = uint32(i)
-}
diff --git a/vendor/golang.org/x/net/ipv6/sys_zos.go b/vendor/golang.org/x/net/ipv6/sys_zos.go
deleted file mode 100644
index 31adc86..0000000
--- a/vendor/golang.org/x/net/ipv6/sys_zos.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ipv6
-
-import (
- "net"
- "syscall"
- "unsafe"
-
- "golang.org/x/net/internal/iana"
- "golang.org/x/net/internal/socket"
-
- "golang.org/x/sys/unix"
-)
-
-var (
- ctlOpts = [ctlMax]ctlOpt{
- ctlHopLimit: {unix.IPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
- ctlPacketInfo: {unix.IPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
- ctlPathMTU: {unix.IPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
- }
-
- sockOpts = map[int]*sockOpt{
- ssoTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_TCLASS, Len: 4}},
- ssoHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_UNICAST_HOPS, Len: 4}},
- ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_IF, Len: 4}},
- ssoMulticastHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_HOPS, Len: 4}},
- ssoMulticastLoopback: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_MULTICAST_LOOP, Len: 4}},
- ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVTCLASS, Len: 4}},
- ssoReceiveHopLimit: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVHOPLIMIT, Len: 4}},
- ssoReceivePacketInfo: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPKTINFO, Len: 4}},
- ssoReceivePathMTU: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_RECVPATHMTU, Len: 4}},
- ssoChecksum: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.IPV6_CHECKSUM, Len: 4}},
- ssoICMPFilter: {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: unix.ICMP6_FILTER, Len: sizeofICMPv6Filter}},
- ssoJoinGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoLeaveGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
- ssoJoinSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoLeaveSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoBlockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- ssoUnblockSourceGroup: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: unix.MCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
- }
-)
-
-func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
- sa.Family = syscall.AF_INET6
- copy(sa.Addr[:], ip)
- sa.Scope_id = uint32(i)
-}
-
-func (pi *inet6Pktinfo) setIfindex(i int) {
- pi.Ifindex = uint32(i)
-}
-
-func (gr *groupReq) setGroup(grp net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gr.Group))
- sa.Family = syscall.AF_INET6
- sa.Len = sizeofSockaddrInet6
- copy(sa.Addr[:], grp)
-}
-
-func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
- sa := (*sockaddrInet6)(unsafe.Pointer(&gsr.Group))
- sa.Family = syscall.AF_INET6
- sa.Len = sizeofSockaddrInet6
- copy(sa.Addr[:], grp)
- sa = (*sockaddrInet6)(unsafe.Pointer(&gsr.Source))
- sa.Family = syscall.AF_INET6
- sa.Len = sizeofSockaddrInet6
- copy(sa.Addr[:], src)
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_aix_ppc64.go b/vendor/golang.org/x/net/ipv6/zsys_aix_ppc64.go
deleted file mode 100644
index f604b0f..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_aix_ppc64.go
+++ /dev/null
@@ -1,69 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_aix.go
-
-// Added for go1.11 compatibility
-//go:build aix
-// +build aix
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x508
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x510
- sizeofGroupSourceReq = 0xa18
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- X__ss_len uint8
- Family uint8
- X__ss_pad1 [6]uint8
- X__ss_align int64
- X__ss_pad2 [1265]uint8
- Pad_cgo_0 [7]byte
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_darwin.go b/vendor/golang.org/x/net/ipv6/zsys_darwin.go
deleted file mode 100644
index dd6f7b2..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_darwin.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_darwin.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [128]byte
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [128]byte
- Pad_cgo_1 [128]byte
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_dragonfly.go b/vendor/golang.org/x/net/ipv6/zsys_dragonfly.go
deleted file mode 100644
index 6b45a94..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_dragonfly.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_dragonfly.go
-
-package ipv6
-
-const (
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_freebsd_386.go b/vendor/golang.org/x/net/ipv6/zsys_freebsd_386.go
deleted file mode 100644
index 8da5592..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_386.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_freebsd_amd64.go b/vendor/golang.org/x/net/ipv6/zsys_freebsd_amd64.go
deleted file mode 100644
index 72a1a65..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_amd64.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm.go b/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm.go
deleted file mode 100644
index 72a1a65..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]int8
- X__ss_align int64
- X__ss_pad2 [112]int8
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm64.go b/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm64.go
deleted file mode 100644
index 5b39eb8..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm64.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_freebsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family uint8
- X__ss_pad1 [6]uint8
- X__ss_align int64
- X__ss_pad2 [112]uint8
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type groupReq struct {
- Interface uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_386.go b/vendor/golang.org/x/net/ipv6/zsys_linux_386.go
deleted file mode 100644
index ad71871..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_386.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_amd64.go b/vendor/golang.org/x/net/ipv6/zsys_linux_amd64.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_amd64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_arm.go b/vendor/golang.org/x/net/ipv6/zsys_linux_arm.go
deleted file mode 100644
index ad71871..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_arm.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_arm64.go b/vendor/golang.org/x/net/ipv6/zsys_linux_arm64.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_arm64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_mips.go b/vendor/golang.org/x/net/ipv6/zsys_linux_mips.go
deleted file mode 100644
index ad71871..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64.go b/vendor/golang.org/x/net/ipv6/zsys_linux_mips64.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64le.go b/vendor/golang.org/x/net/ipv6/zsys_linux_mips64le.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64le.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_mipsle.go b/vendor/golang.org/x/net/ipv6/zsys_linux_mipsle.go
deleted file mode 100644
index ad71871..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mipsle.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc.go b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc.go
deleted file mode 100644
index d06c2ad..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x84
- sizeofGroupSourceReq = 0x104
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]uint8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64.go b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64le.go b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64le.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64le.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_riscv64.go b/vendor/golang.org/x/net/ipv6/zsys_linux_riscv64.go
deleted file mode 100644
index d4f78e4..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_riscv64.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-//go:build riscv64
-// +build riscv64
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_linux_s390x.go b/vendor/golang.org/x/net/ipv6/zsys_linux_s390x.go
deleted file mode 100644
index 2514ab9..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_s390x.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_linux.go
-
-package ipv6
-
-const (
- sizeofKernelSockaddrStorage = 0x80
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
- sizeofIPv6FlowlabelReq = 0x20
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x88
- sizeofGroupSourceReq = 0x108
-
- sizeofICMPv6Filter = 0x20
-)
-
-type kernelSockaddrStorage struct {
- Family uint16
- X__data [126]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6FlowlabelReq struct {
- Dst [16]byte /* in6_addr */
- Label uint32
- Action uint8
- Share uint8
- Flags uint16
- Expires uint16
- Linger uint16
- X__flr_pad uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Ifindex int32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [4]byte
- Group kernelSockaddrStorage
- Source kernelSockaddrStorage
-}
-
-type icmpv6Filter struct {
- Data [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_netbsd.go b/vendor/golang.org/x/net/ipv6/zsys_netbsd.go
deleted file mode 100644
index f7335d5..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_netbsd.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_netbsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_openbsd.go b/vendor/golang.org/x/net/ipv6/zsys_openbsd.go
deleted file mode 100644
index 6d15928..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_openbsd.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_openbsd.go
-
-package ipv6
-
-const (
- sizeofSockaddrInet6 = 0x1c
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x20
-
- sizeofIPv6Mreq = 0x14
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_solaris.go b/vendor/golang.org/x/net/ipv6/zsys_solaris.go
deleted file mode 100644
index 1716197..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_solaris.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Code generated by cmd/cgo -godefs; DO NOT EDIT.
-// cgo -godefs defs_solaris.go
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 0x100
- sizeofSockaddrInet6 = 0x20
- sizeofInet6Pktinfo = 0x14
- sizeofIPv6Mtuinfo = 0x24
-
- sizeofIPv6Mreq = 0x14
- sizeofGroupReq = 0x104
- sizeofGroupSourceReq = 0x204
-
- sizeofICMPv6Filter = 0x20
-)
-
-type sockaddrStorage struct {
- Family uint16
- X_ss_pad1 [6]int8
- X_ss_align float64
- X_ss_pad2 [240]int8
-}
-
-type sockaddrInet6 struct {
- Family uint16
- Port uint16
- Flowinfo uint32
- Addr [16]byte /* in6_addr */
- Scope_id uint32
- X__sin6_src_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte /* in6_addr */
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type ipv6Mreq struct {
- Multiaddr [16]byte /* in6_addr */
- Interface uint32
-}
-
-type groupReq struct {
- Interface uint32
- Pad_cgo_0 [256]byte
-}
-
-type groupSourceReq struct {
- Interface uint32
- Pad_cgo_0 [256]byte
- Pad_cgo_1 [256]byte
-}
-
-type icmpv6Filter struct {
- X__icmp6_filt [8]uint32
-}
diff --git a/vendor/golang.org/x/net/ipv6/zsys_zos_s390x.go b/vendor/golang.org/x/net/ipv6/zsys_zos_s390x.go
deleted file mode 100644
index 7c75645..0000000
--- a/vendor/golang.org/x/net/ipv6/zsys_zos_s390x.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Hand edited based on zerrors_zos_s390x.go
-// TODO(Bill O'Farrell): auto-generate.
-
-package ipv6
-
-const (
- sizeofSockaddrStorage = 128
- sizeofICMPv6Filter = 32
- sizeofInet6Pktinfo = 20
- sizeofIPv6Mtuinfo = 32
- sizeofSockaddrInet6 = 28
- sizeofGroupReq = 136
- sizeofGroupSourceReq = 264
-)
-
-type sockaddrStorage struct {
- Len uint8
- Family byte
- ss_pad1 [6]byte
- ss_align int64
- ss_pad2 [112]byte
-}
-
-type sockaddrInet6 struct {
- Len uint8
- Family uint8
- Port uint16
- Flowinfo uint32
- Addr [16]byte
- Scope_id uint32
-}
-
-type inet6Pktinfo struct {
- Addr [16]byte
- Ifindex uint32
-}
-
-type ipv6Mtuinfo struct {
- Addr sockaddrInet6
- Mtu uint32
-}
-
-type groupReq struct {
- Interface uint32
- reserved uint32
- Group sockaddrStorage
-}
-
-type groupSourceReq struct {
- Interface uint32
- reserved uint32
- Group sockaddrStorage
- Source sockaddrStorage
-}
-
-type icmpv6Filter struct {
- Filt [8]uint32
-}
diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE
deleted file mode 100644
index 6a66aea..0000000
--- a/vendor/golang.org/x/sys/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/sys/PATENTS b/vendor/golang.org/x/sys/PATENTS
deleted file mode 100644
index 7330990..0000000
--- a/vendor/golang.org/x/sys/PATENTS
+++ /dev/null
@@ -1,22 +0,0 @@
-Additional IP Rights Grant (Patents)
-
-"This implementation" means the copyrightable works distributed by
-Google as part of the Go project.
-
-Google hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this section)
-patent license to make, have made, use, offer to sell, sell, import,
-transfer and otherwise run, modify and propagate the contents of this
-implementation of Go, where such license applies only to those patent
-claims, both currently owned or controlled by Google and acquired in
-the future, licensable by Google that are necessarily infringed by this
-implementation of Go. This grant does not include claims that would be
-infringed only as a consequence of further modification of this
-implementation. If you or your agent or exclusive licensee institute or
-order or agree to the institution of patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging
-that this implementation of Go or any code incorporated within this
-implementation of Go constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any patent
-rights granted to you under this License for this implementation of Go
-shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
deleted file mode 100644
index db9171c..0000000
--- a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go
-//
-
-TEXT ·syscall6(SB),NOSPLIT,$0-88
- JMP syscall·syscall6(SB)
-
-TEXT ·rawSyscall6(SB),NOSPLIT,$0-88
- JMP syscall·rawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/cpu/byteorder.go b/vendor/golang.org/x/sys/cpu/byteorder.go
deleted file mode 100644
index 271055b..0000000
--- a/vendor/golang.org/x/sys/cpu/byteorder.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-import (
- "runtime"
-)
-
-// byteOrder is a subset of encoding/binary.ByteOrder.
-type byteOrder interface {
- Uint32([]byte) uint32
- Uint64([]byte) uint64
-}
-
-type littleEndian struct{}
-type bigEndian struct{}
-
-func (littleEndian) Uint32(b []byte) uint32 {
- _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func (littleEndian) Uint64(b []byte) uint64 {
- _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
-func (bigEndian) Uint32(b []byte) uint32 {
- _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
- return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
-}
-
-func (bigEndian) Uint64(b []byte) uint64 {
- _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
- uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
-}
-
-// hostByteOrder returns littleEndian on little-endian machines and
-// bigEndian on big-endian machines.
-func hostByteOrder() byteOrder {
- switch runtime.GOARCH {
- case "386", "amd64", "amd64p32",
- "alpha",
- "arm", "arm64",
- "loong64",
- "mipsle", "mips64le", "mips64p32le",
- "nios2",
- "ppc64le",
- "riscv", "riscv64",
- "sh":
- return littleEndian{}
- case "armbe", "arm64be",
- "m68k",
- "mips", "mips64", "mips64p32",
- "ppc", "ppc64",
- "s390", "s390x",
- "shbe",
- "sparc", "sparc64":
- return bigEndian{}
- }
- panic("unknown architecture")
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
deleted file mode 100644
index 83f112c..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package cpu implements processor feature detection for
-// various CPU architectures.
-package cpu
-
-import (
- "os"
- "strings"
-)
-
-// Initialized reports whether the CPU features were initialized.
-//
-// For some GOOS/GOARCH combinations initialization of the CPU features depends
-// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
-// Initialized will report false if reading the file fails.
-var Initialized bool
-
-// CacheLinePad is used to pad structs to avoid false sharing.
-type CacheLinePad struct{ _ [cacheLineSize]byte }
-
-// X86 contains the supported CPU features of the
-// current X86/AMD64 platform. If the current platform
-// is not X86/AMD64 then all feature flags are false.
-//
-// X86 is padded to avoid false sharing. Further the HasAVX
-// and HasAVX2 are only set if the OS supports XMM and YMM
-// registers in addition to the CPUID feature bit being set.
-var X86 struct {
- _ CacheLinePad
- HasAES bool // AES hardware implementation (AES NI)
- HasADX bool // Multi-precision add-carry instruction extensions
- HasAVX bool // Advanced vector extension
- HasAVX2 bool // Advanced vector extension 2
- HasAVX512 bool // Advanced vector extension 512
- HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
- HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
- HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
- HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions Instructions
- HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
- HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
- HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
- HasAVX512IFMA bool // Advanced vector extension 512 Integer Fused Multiply Add
- HasAVX512VBMI bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
- HasAVX5124VNNIW bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
- HasAVX5124FMAPS bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
- HasAVX512VPOPCNTDQ bool // Advanced vector extension 512 Double and quad word population count instructions
- HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
- HasAVX512VNNI bool // Advanced vector extension 512 Vector Neural Network Instructions
- HasAVX512GFNI bool // Advanced vector extension 512 Galois field New Instructions
- HasAVX512VAES bool // Advanced vector extension 512 Vector AES instructions
- HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
- HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
- HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
- HasBMI1 bool // Bit manipulation instruction set 1
- HasBMI2 bool // Bit manipulation instruction set 2
- HasCX16 bool // Compare and exchange 16 Bytes
- HasERMS bool // Enhanced REP for MOVSB and STOSB
- HasFMA bool // Fused-multiply-add instructions
- HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
- HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM
- HasPOPCNT bool // Hamming weight instruction POPCNT.
- HasRDRAND bool // RDRAND instruction (on-chip random number generator)
- HasRDSEED bool // RDSEED instruction (on-chip random number generator)
- HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64)
- HasSSE3 bool // Streaming SIMD extension 3
- HasSSSE3 bool // Supplemental streaming SIMD extension 3
- HasSSE41 bool // Streaming SIMD extension 4 and 4.1
- HasSSE42 bool // Streaming SIMD extension 4 and 4.2
- _ CacheLinePad
-}
-
-// ARM64 contains the supported CPU features of the
-// current ARMv8(aarch64) platform. If the current platform
-// is not arm64 then all feature flags are false.
-var ARM64 struct {
- _ CacheLinePad
- HasFP bool // Floating-point instruction set (always available)
- HasASIMD bool // Advanced SIMD (always available)
- HasEVTSTRM bool // Event stream support
- HasAES bool // AES hardware implementation
- HasPMULL bool // Polynomial multiplication instruction set
- HasSHA1 bool // SHA1 hardware implementation
- HasSHA2 bool // SHA2 hardware implementation
- HasCRC32 bool // CRC32 hardware implementation
- HasATOMICS bool // Atomic memory operation instruction set
- HasFPHP bool // Half precision floating-point instruction set
- HasASIMDHP bool // Advanced SIMD half precision instruction set
- HasCPUID bool // CPUID identification scheme registers
- HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
- HasJSCVT bool // Javascript conversion from floating-point to integer
- HasFCMA bool // Floating-point multiplication and addition of complex numbers
- HasLRCPC bool // Release Consistent processor consistent support
- HasDCPOP bool // Persistent memory support
- HasSHA3 bool // SHA3 hardware implementation
- HasSM3 bool // SM3 hardware implementation
- HasSM4 bool // SM4 hardware implementation
- HasASIMDDP bool // Advanced SIMD double precision instruction set
- HasSHA512 bool // SHA512 hardware implementation
- HasSVE bool // Scalable Vector Extensions
- HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
- _ CacheLinePad
-}
-
-// ARM contains the supported CPU features of the current ARM (32-bit) platform.
-// All feature flags are false if:
-// 1. the current platform is not arm, or
-// 2. the current operating system is not Linux.
-var ARM struct {
- _ CacheLinePad
- HasSWP bool // SWP instruction support
- HasHALF bool // Half-word load and store support
- HasTHUMB bool // ARM Thumb instruction set
- Has26BIT bool // Address space limited to 26-bits
- HasFASTMUL bool // 32-bit operand, 64-bit result multiplication support
- HasFPA bool // Floating point arithmetic support
- HasVFP bool // Vector floating point support
- HasEDSP bool // DSP Extensions support
- HasJAVA bool // Java instruction set
- HasIWMMXT bool // Intel Wireless MMX technology support
- HasCRUNCH bool // MaverickCrunch context switching and handling
- HasTHUMBEE bool // Thumb EE instruction set
- HasNEON bool // NEON instruction set
- HasVFPv3 bool // Vector floating point version 3 support
- HasVFPv3D16 bool // Vector floating point version 3 D8-D15
- HasTLS bool // Thread local storage support
- HasVFPv4 bool // Vector floating point version 4 support
- HasIDIVA bool // Integer divide instruction support in ARM mode
- HasIDIVT bool // Integer divide instruction support in Thumb mode
- HasVFPD32 bool // Vector floating point version 3 D15-D31
- HasLPAE bool // Large Physical Address Extensions
- HasEVTSTRM bool // Event stream support
- HasAES bool // AES hardware implementation
- HasPMULL bool // Polynomial multiplication instruction set
- HasSHA1 bool // SHA1 hardware implementation
- HasSHA2 bool // SHA2 hardware implementation
- HasCRC32 bool // CRC32 hardware implementation
- _ CacheLinePad
-}
-
-// MIPS64X contains the supported CPU features of the current mips64/mips64le
-// platforms. If the current platform is not mips64/mips64le or the current
-// operating system is not Linux then all feature flags are false.
-var MIPS64X struct {
- _ CacheLinePad
- HasMSA bool // MIPS SIMD architecture
- _ CacheLinePad
-}
-
-// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
-// If the current platform is not ppc64/ppc64le then all feature flags are false.
-//
-// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
-// since there are no optional categories. There are some exceptions that also
-// require kernel support to work (DARN, SCV), so there are feature bits for
-// those as well. The struct is padded to avoid false sharing.
-var PPC64 struct {
- _ CacheLinePad
- HasDARN bool // Hardware random number generator (requires kernel enablement)
- HasSCV bool // Syscall vectored (requires kernel enablement)
- IsPOWER8 bool // ISA v2.07 (POWER8)
- IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
- _ CacheLinePad
-}
-
-// S390X contains the supported CPU features of the current IBM Z
-// (s390x) platform. If the current platform is not IBM Z then all
-// feature flags are false.
-//
-// S390X is padded to avoid false sharing. Further HasVX is only set
-// if the OS supports vector registers in addition to the STFLE
-// feature bit being set.
-var S390X struct {
- _ CacheLinePad
- HasZARCH bool // z/Architecture mode is active [mandatory]
- HasSTFLE bool // store facility list extended
- HasLDISP bool // long (20-bit) displacements
- HasEIMM bool // 32-bit immediates
- HasDFP bool // decimal floating point
- HasETF3EH bool // ETF-3 enhanced
- HasMSA bool // message security assist (CPACF)
- HasAES bool // KM-AES{128,192,256} functions
- HasAESCBC bool // KMC-AES{128,192,256} functions
- HasAESCTR bool // KMCTR-AES{128,192,256} functions
- HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
- HasGHASH bool // KIMD-GHASH function
- HasSHA1 bool // K{I,L}MD-SHA-1 functions
- HasSHA256 bool // K{I,L}MD-SHA-256 functions
- HasSHA512 bool // K{I,L}MD-SHA-512 functions
- HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
- HasVX bool // vector facility
- HasVXE bool // vector-enhancements facility 1
- _ CacheLinePad
-}
-
-func init() {
- archInit()
- initOptions()
- processOptions()
-}
-
-// options contains the cpu debug options that can be used in GODEBUG.
-// Options are arch dependent and are added by the arch specific initOptions functions.
-// Features that are mandatory for the specific GOARCH should have the Required field set
-// (e.g. SSE2 on amd64).
-var options []option
-
-// Option names should be lower case. e.g. avx instead of AVX.
-type option struct {
- Name string
- Feature *bool
- Specified bool // whether feature value was specified in GODEBUG
- Enable bool // whether feature should be enabled
- Required bool // whether feature is mandatory and can not be disabled
-}
-
-func processOptions() {
- env := os.Getenv("GODEBUG")
-field:
- for env != "" {
- field := ""
- i := strings.IndexByte(env, ',')
- if i < 0 {
- field, env = env, ""
- } else {
- field, env = env[:i], env[i+1:]
- }
- if len(field) < 4 || field[:4] != "cpu." {
- continue
- }
- i = strings.IndexByte(field, '=')
- if i < 0 {
- print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n")
- continue
- }
- key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
-
- var enable bool
- switch value {
- case "on":
- enable = true
- case "off":
- enable = false
- default:
- print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
- continue field
- }
-
- if key == "all" {
- for i := range options {
- options[i].Specified = true
- options[i].Enable = enable || options[i].Required
- }
- continue field
- }
-
- for i := range options {
- if options[i].Name == key {
- options[i].Specified = true
- options[i].Enable = enable
- continue field
- }
- }
-
- print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n")
- }
-
- for _, o := range options {
- if !o.Specified {
- continue
- }
-
- if o.Enable && !*o.Feature {
- print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n")
- continue
- }
-
- if !o.Enable && o.Required {
- print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n")
- continue
- }
-
- *o.Feature = o.Enable
- }
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_aix.go b/vendor/golang.org/x/sys/cpu/cpu_aix.go
deleted file mode 100644
index 8aaeef5..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_aix.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix
-// +build aix
-
-package cpu
-
-const (
- // getsystemcfg constants
- _SC_IMPL = 2
- _IMPL_POWER8 = 0x10000
- _IMPL_POWER9 = 0x20000
-)
-
-func archInit() {
- impl := getsystemcfg(_SC_IMPL)
- if impl&_IMPL_POWER8 != 0 {
- PPC64.IsPOWER8 = true
- }
- if impl&_IMPL_POWER9 != 0 {
- PPC64.IsPOWER8 = true
- PPC64.IsPOWER9 = true
- }
-
- Initialized = true
-}
-
-func getsystemcfg(label int) (n uint64) {
- r0, _ := callgetsystemcfg(label)
- n = uint64(r0)
- return
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm.go b/vendor/golang.org/x/sys/cpu/cpu_arm.go
deleted file mode 100644
index 301b752..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_arm.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-const cacheLineSize = 32
-
-// HWCAP/HWCAP2 bits.
-// These are specific to Linux.
-const (
- hwcap_SWP = 1 << 0
- hwcap_HALF = 1 << 1
- hwcap_THUMB = 1 << 2
- hwcap_26BIT = 1 << 3
- hwcap_FAST_MULT = 1 << 4
- hwcap_FPA = 1 << 5
- hwcap_VFP = 1 << 6
- hwcap_EDSP = 1 << 7
- hwcap_JAVA = 1 << 8
- hwcap_IWMMXT = 1 << 9
- hwcap_CRUNCH = 1 << 10
- hwcap_THUMBEE = 1 << 11
- hwcap_NEON = 1 << 12
- hwcap_VFPv3 = 1 << 13
- hwcap_VFPv3D16 = 1 << 14
- hwcap_TLS = 1 << 15
- hwcap_VFPv4 = 1 << 16
- hwcap_IDIVA = 1 << 17
- hwcap_IDIVT = 1 << 18
- hwcap_VFPD32 = 1 << 19
- hwcap_LPAE = 1 << 20
- hwcap_EVTSTRM = 1 << 21
-
- hwcap2_AES = 1 << 0
- hwcap2_PMULL = 1 << 1
- hwcap2_SHA1 = 1 << 2
- hwcap2_SHA2 = 1 << 3
- hwcap2_CRC32 = 1 << 4
-)
-
-func initOptions() {
- options = []option{
- {Name: "pmull", Feature: &ARM.HasPMULL},
- {Name: "sha1", Feature: &ARM.HasSHA1},
- {Name: "sha2", Feature: &ARM.HasSHA2},
- {Name: "swp", Feature: &ARM.HasSWP},
- {Name: "thumb", Feature: &ARM.HasTHUMB},
- {Name: "thumbee", Feature: &ARM.HasTHUMBEE},
- {Name: "tls", Feature: &ARM.HasTLS},
- {Name: "vfp", Feature: &ARM.HasVFP},
- {Name: "vfpd32", Feature: &ARM.HasVFPD32},
- {Name: "vfpv3", Feature: &ARM.HasVFPv3},
- {Name: "vfpv3d16", Feature: &ARM.HasVFPv3D16},
- {Name: "vfpv4", Feature: &ARM.HasVFPv4},
- {Name: "half", Feature: &ARM.HasHALF},
- {Name: "26bit", Feature: &ARM.Has26BIT},
- {Name: "fastmul", Feature: &ARM.HasFASTMUL},
- {Name: "fpa", Feature: &ARM.HasFPA},
- {Name: "edsp", Feature: &ARM.HasEDSP},
- {Name: "java", Feature: &ARM.HasJAVA},
- {Name: "iwmmxt", Feature: &ARM.HasIWMMXT},
- {Name: "crunch", Feature: &ARM.HasCRUNCH},
- {Name: "neon", Feature: &ARM.HasNEON},
- {Name: "idivt", Feature: &ARM.HasIDIVT},
- {Name: "idiva", Feature: &ARM.HasIDIVA},
- {Name: "lpae", Feature: &ARM.HasLPAE},
- {Name: "evtstrm", Feature: &ARM.HasEVTSTRM},
- {Name: "aes", Feature: &ARM.HasAES},
- {Name: "crc32", Feature: &ARM.HasCRC32},
- }
-
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
deleted file mode 100644
index f3eb993..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-import "runtime"
-
-// cacheLineSize is used to prevent false sharing of cache lines.
-// We choose 128 because Apple Silicon, a.k.a. M1, has 128-byte cache line size.
-// It doesn't cost much and is much more future-proof.
-const cacheLineSize = 128
-
-func initOptions() {
- options = []option{
- {Name: "fp", Feature: &ARM64.HasFP},
- {Name: "asimd", Feature: &ARM64.HasASIMD},
- {Name: "evstrm", Feature: &ARM64.HasEVTSTRM},
- {Name: "aes", Feature: &ARM64.HasAES},
- {Name: "fphp", Feature: &ARM64.HasFPHP},
- {Name: "jscvt", Feature: &ARM64.HasJSCVT},
- {Name: "lrcpc", Feature: &ARM64.HasLRCPC},
- {Name: "pmull", Feature: &ARM64.HasPMULL},
- {Name: "sha1", Feature: &ARM64.HasSHA1},
- {Name: "sha2", Feature: &ARM64.HasSHA2},
- {Name: "sha3", Feature: &ARM64.HasSHA3},
- {Name: "sha512", Feature: &ARM64.HasSHA512},
- {Name: "sm3", Feature: &ARM64.HasSM3},
- {Name: "sm4", Feature: &ARM64.HasSM4},
- {Name: "sve", Feature: &ARM64.HasSVE},
- {Name: "crc32", Feature: &ARM64.HasCRC32},
- {Name: "atomics", Feature: &ARM64.HasATOMICS},
- {Name: "asimdhp", Feature: &ARM64.HasASIMDHP},
- {Name: "cpuid", Feature: &ARM64.HasCPUID},
- {Name: "asimrdm", Feature: &ARM64.HasASIMDRDM},
- {Name: "fcma", Feature: &ARM64.HasFCMA},
- {Name: "dcpop", Feature: &ARM64.HasDCPOP},
- {Name: "asimddp", Feature: &ARM64.HasASIMDDP},
- {Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM},
- }
-}
-
-func archInit() {
- switch runtime.GOOS {
- case "freebsd":
- readARM64Registers()
- case "linux", "netbsd", "openbsd":
- doinit()
- default:
- // Many platforms don't seem to allow reading these registers.
- setMinimalFeatures()
- }
-}
-
-// setMinimalFeatures fakes the minimal ARM64 features expected by
-// TestARM64minimalFeatures.
-func setMinimalFeatures() {
- ARM64.HasASIMD = true
- ARM64.HasFP = true
-}
-
-func readARM64Registers() {
- Initialized = true
-
- parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0())
-}
-
-func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
- // ID_AA64ISAR0_EL1
- switch extractBits(isar0, 4, 7) {
- case 1:
- ARM64.HasAES = true
- case 2:
- ARM64.HasAES = true
- ARM64.HasPMULL = true
- }
-
- switch extractBits(isar0, 8, 11) {
- case 1:
- ARM64.HasSHA1 = true
- }
-
- switch extractBits(isar0, 12, 15) {
- case 1:
- ARM64.HasSHA2 = true
- case 2:
- ARM64.HasSHA2 = true
- ARM64.HasSHA512 = true
- }
-
- switch extractBits(isar0, 16, 19) {
- case 1:
- ARM64.HasCRC32 = true
- }
-
- switch extractBits(isar0, 20, 23) {
- case 2:
- ARM64.HasATOMICS = true
- }
-
- switch extractBits(isar0, 28, 31) {
- case 1:
- ARM64.HasASIMDRDM = true
- }
-
- switch extractBits(isar0, 32, 35) {
- case 1:
- ARM64.HasSHA3 = true
- }
-
- switch extractBits(isar0, 36, 39) {
- case 1:
- ARM64.HasSM3 = true
- }
-
- switch extractBits(isar0, 40, 43) {
- case 1:
- ARM64.HasSM4 = true
- }
-
- switch extractBits(isar0, 44, 47) {
- case 1:
- ARM64.HasASIMDDP = true
- }
-
- // ID_AA64ISAR1_EL1
- switch extractBits(isar1, 0, 3) {
- case 1:
- ARM64.HasDCPOP = true
- }
-
- switch extractBits(isar1, 12, 15) {
- case 1:
- ARM64.HasJSCVT = true
- }
-
- switch extractBits(isar1, 16, 19) {
- case 1:
- ARM64.HasFCMA = true
- }
-
- switch extractBits(isar1, 20, 23) {
- case 1:
- ARM64.HasLRCPC = true
- }
-
- // ID_AA64PFR0_EL1
- switch extractBits(pfr0, 16, 19) {
- case 0:
- ARM64.HasFP = true
- case 1:
- ARM64.HasFP = true
- ARM64.HasFPHP = true
- }
-
- switch extractBits(pfr0, 20, 23) {
- case 0:
- ARM64.HasASIMD = true
- case 1:
- ARM64.HasASIMD = true
- ARM64.HasASIMDHP = true
- }
-
- switch extractBits(pfr0, 32, 35) {
- case 1:
- ARM64.HasSVE = true
- }
-}
-
-func extractBits(data uint64, start, end uint) uint {
- return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
deleted file mode 100644
index c61f95a..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.s
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-// func getisar0() uint64
-TEXT ·getisar0(SB),NOSPLIT,$0-8
- // get Instruction Set Attributes 0 into x0
- // mrs x0, ID_AA64ISAR0_EL1 = d5380600
- WORD $0xd5380600
- MOVD R0, ret+0(FP)
- RET
-
-// func getisar1() uint64
-TEXT ·getisar1(SB),NOSPLIT,$0-8
- // get Instruction Set Attributes 1 into x0
- // mrs x0, ID_AA64ISAR1_EL1 = d5380620
- WORD $0xd5380620
- MOVD R0, ret+0(FP)
- RET
-
-// func getpfr0() uint64
-TEXT ·getpfr0(SB),NOSPLIT,$0-8
- // get Processor Feature Register 0 into x0
- // mrs x0, ID_AA64PFR0_EL1 = d5380400
- WORD $0xd5380400
- MOVD R0, ret+0(FP)
- RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
deleted file mode 100644
index ccf542a..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-package cpu
-
-func getisar0() uint64
-func getisar1() uint64
-func getpfr0() uint64
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
deleted file mode 100644
index 0af2f24..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-package cpu
-
-// haveAsmFunctions reports whether the other functions in this file can
-// be safely called.
-func haveAsmFunctions() bool { return true }
-
-// The following feature detection functions are defined in cpu_s390x.s.
-// They are likely to be expensive to call so the results should be cached.
-func stfle() facilityList
-func kmQuery() queryResult
-func kmcQuery() queryResult
-func kmctrQuery() queryResult
-func kmaQuery() queryResult
-func kimdQuery() queryResult
-func klmdQuery() queryResult
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
deleted file mode 100644
index fa7cdb9..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (386 || amd64 || amd64p32) && gc
-// +build 386 amd64 amd64p32
-// +build gc
-
-package cpu
-
-// cpuid is implemented in cpu_x86.s for gc compiler
-// and in cpu_gccgo.c for gccgo.
-func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
-
-// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
-// and in cpu_gccgo.c for gccgo.
-func xgetbv() (eax, edx uint32)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
deleted file mode 100644
index 2aff318..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gccgo
-// +build gccgo
-
-package cpu
-
-func getisar0() uint64 { return 0 }
-func getisar1() uint64 { return 0 }
-func getpfr0() uint64 { return 0 }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
deleted file mode 100644
index 4bfbda6..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gccgo
-// +build gccgo
-
-package cpu
-
-// haveAsmFunctions reports whether the other functions in this file can
-// be safely called.
-func haveAsmFunctions() bool { return false }
-
-// TODO(mundaym): the following feature detection functions are currently
-// stubs. See https://golang.org/cl/162887 for how to fix this.
-// They are likely to be expensive to call so the results should be cached.
-func stfle() facilityList { panic("not implemented for gccgo") }
-func kmQuery() queryResult { panic("not implemented for gccgo") }
-func kmcQuery() queryResult { panic("not implemented for gccgo") }
-func kmctrQuery() queryResult { panic("not implemented for gccgo") }
-func kmaQuery() queryResult { panic("not implemented for gccgo") }
-func kimdQuery() queryResult { panic("not implemented for gccgo") }
-func klmdQuery() queryResult { panic("not implemented for gccgo") }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
deleted file mode 100644
index a4605e6..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build 386 amd64 amd64p32
-// +build gccgo
-
-#include
-#include
-#include
-
-// Need to wrap __get_cpuid_count because it's declared as static.
-int
-gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
-{
- return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
-}
-
-#pragma GCC diagnostic ignored "-Wunknown-pragmas"
-#pragma GCC push_options
-#pragma GCC target("xsave")
-#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function)
-
-// xgetbv reads the contents of an XCR (Extended Control Register)
-// specified in the ECX register into registers EDX:EAX.
-// Currently, the only supported value for XCR is 0.
-void
-gccgoXgetbv(uint32_t *eax, uint32_t *edx)
-{
- uint64_t v = _xgetbv(0);
- *eax = v & 0xffffffff;
- *edx = v >> 32;
-}
-
-#pragma clang attribute pop
-#pragma GCC pop_options
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
deleted file mode 100644
index 863d415..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (386 || amd64 || amd64p32) && gccgo
-// +build 386 amd64 amd64p32
-// +build gccgo
-
-package cpu
-
-//extern gccgoGetCpuidCount
-func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32)
-
-func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) {
- var a, b, c, d uint32
- gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d)
- return a, b, c, d
-}
-
-//extern gccgoXgetbv
-func gccgoXgetbv(eax, edx *uint32)
-
-func xgetbv() (eax, edx uint32) {
- var a, d uint32
- gccgoXgetbv(&a, &d)
- return a, d
-}
-
-// gccgo doesn't build on Darwin, per:
-// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
-func darwinSupportsAVX512() bool {
- return false
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux.go b/vendor/golang.org/x/sys/cpu/cpu_linux.go
deleted file mode 100644
index 159a686..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !386 && !amd64 && !amd64p32 && !arm64
-// +build !386,!amd64,!amd64p32,!arm64
-
-package cpu
-
-func archInit() {
- if err := readHWCAP(); err != nil {
- return
- }
- doinit()
- Initialized = true
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
deleted file mode 100644
index 2057006..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-func doinit() {
- ARM.HasSWP = isSet(hwCap, hwcap_SWP)
- ARM.HasHALF = isSet(hwCap, hwcap_HALF)
- ARM.HasTHUMB = isSet(hwCap, hwcap_THUMB)
- ARM.Has26BIT = isSet(hwCap, hwcap_26BIT)
- ARM.HasFASTMUL = isSet(hwCap, hwcap_FAST_MULT)
- ARM.HasFPA = isSet(hwCap, hwcap_FPA)
- ARM.HasVFP = isSet(hwCap, hwcap_VFP)
- ARM.HasEDSP = isSet(hwCap, hwcap_EDSP)
- ARM.HasJAVA = isSet(hwCap, hwcap_JAVA)
- ARM.HasIWMMXT = isSet(hwCap, hwcap_IWMMXT)
- ARM.HasCRUNCH = isSet(hwCap, hwcap_CRUNCH)
- ARM.HasTHUMBEE = isSet(hwCap, hwcap_THUMBEE)
- ARM.HasNEON = isSet(hwCap, hwcap_NEON)
- ARM.HasVFPv3 = isSet(hwCap, hwcap_VFPv3)
- ARM.HasVFPv3D16 = isSet(hwCap, hwcap_VFPv3D16)
- ARM.HasTLS = isSet(hwCap, hwcap_TLS)
- ARM.HasVFPv4 = isSet(hwCap, hwcap_VFPv4)
- ARM.HasIDIVA = isSet(hwCap, hwcap_IDIVA)
- ARM.HasIDIVT = isSet(hwCap, hwcap_IDIVT)
- ARM.HasVFPD32 = isSet(hwCap, hwcap_VFPD32)
- ARM.HasLPAE = isSet(hwCap, hwcap_LPAE)
- ARM.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
- ARM.HasAES = isSet(hwCap2, hwcap2_AES)
- ARM.HasPMULL = isSet(hwCap2, hwcap2_PMULL)
- ARM.HasSHA1 = isSet(hwCap2, hwcap2_SHA1)
- ARM.HasSHA2 = isSet(hwCap2, hwcap2_SHA2)
- ARM.HasCRC32 = isSet(hwCap2, hwcap2_CRC32)
-}
-
-func isSet(hwc uint, value uint) bool {
- return hwc&value != 0
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
deleted file mode 100644
index 79a38a0..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-// HWCAP/HWCAP2 bits. These are exposed by Linux.
-const (
- hwcap_FP = 1 << 0
- hwcap_ASIMD = 1 << 1
- hwcap_EVTSTRM = 1 << 2
- hwcap_AES = 1 << 3
- hwcap_PMULL = 1 << 4
- hwcap_SHA1 = 1 << 5
- hwcap_SHA2 = 1 << 6
- hwcap_CRC32 = 1 << 7
- hwcap_ATOMICS = 1 << 8
- hwcap_FPHP = 1 << 9
- hwcap_ASIMDHP = 1 << 10
- hwcap_CPUID = 1 << 11
- hwcap_ASIMDRDM = 1 << 12
- hwcap_JSCVT = 1 << 13
- hwcap_FCMA = 1 << 14
- hwcap_LRCPC = 1 << 15
- hwcap_DCPOP = 1 << 16
- hwcap_SHA3 = 1 << 17
- hwcap_SM3 = 1 << 18
- hwcap_SM4 = 1 << 19
- hwcap_ASIMDDP = 1 << 20
- hwcap_SHA512 = 1 << 21
- hwcap_SVE = 1 << 22
- hwcap_ASIMDFHM = 1 << 23
-)
-
-func doinit() {
- if err := readHWCAP(); err != nil {
- // failed to read /proc/self/auxv, try reading registers directly
- readARM64Registers()
- return
- }
-
- // HWCAP feature bits
- ARM64.HasFP = isSet(hwCap, hwcap_FP)
- ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD)
- ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
- ARM64.HasAES = isSet(hwCap, hwcap_AES)
- ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL)
- ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1)
- ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2)
- ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32)
- ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS)
- ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP)
- ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP)
- ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID)
- ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM)
- ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT)
- ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA)
- ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC)
- ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP)
- ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3)
- ARM64.HasSM3 = isSet(hwCap, hwcap_SM3)
- ARM64.HasSM4 = isSet(hwCap, hwcap_SM4)
- ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP)
- ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
- ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
- ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
-}
-
-func isSet(hwc uint, value uint) bool {
- return hwc&value != 0
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
deleted file mode 100644
index 6000db4..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && (mips64 || mips64le)
-// +build linux
-// +build mips64 mips64le
-
-package cpu
-
-// HWCAP bits. These are exposed by the Linux kernel 5.4.
-const (
- // CPU features
- hwcap_MIPS_MSA = 1 << 1
-)
-
-func doinit() {
- // HWCAP feature bits
- MIPS64X.HasMSA = isSet(hwCap, hwcap_MIPS_MSA)
-}
-
-func isSet(hwc uint, value uint) bool {
- return hwc&value != 0
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
deleted file mode 100644
index f4992b1..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
-// +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x
-
-package cpu
-
-func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
deleted file mode 100644
index 021356d..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && (ppc64 || ppc64le)
-// +build linux
-// +build ppc64 ppc64le
-
-package cpu
-
-// HWCAP/HWCAP2 bits. These are exposed by the kernel.
-const (
- // ISA Level
- _PPC_FEATURE2_ARCH_2_07 = 0x80000000
- _PPC_FEATURE2_ARCH_3_00 = 0x00800000
-
- // CPU features
- _PPC_FEATURE2_DARN = 0x00200000
- _PPC_FEATURE2_SCV = 0x00100000
-)
-
-func doinit() {
- // HWCAP2 feature bits
- PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07)
- PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00)
- PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN)
- PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV)
-}
-
-func isSet(hwc uint, value uint) bool {
- return hwc&value != 0
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
deleted file mode 100644
index 1517ac6..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-const (
- // bit mask values from /usr/include/bits/hwcap.h
- hwcap_ZARCH = 2
- hwcap_STFLE = 4
- hwcap_MSA = 8
- hwcap_LDISP = 16
- hwcap_EIMM = 32
- hwcap_DFP = 64
- hwcap_ETF3EH = 256
- hwcap_VX = 2048
- hwcap_VXE = 8192
-)
-
-func initS390Xbase() {
- // test HWCAP bit vector
- has := func(featureMask uint) bool {
- return hwCap&featureMask == featureMask
- }
-
- // mandatory
- S390X.HasZARCH = has(hwcap_ZARCH)
-
- // optional
- S390X.HasSTFLE = has(hwcap_STFLE)
- S390X.HasLDISP = has(hwcap_LDISP)
- S390X.HasEIMM = has(hwcap_EIMM)
- S390X.HasETF3EH = has(hwcap_ETF3EH)
- S390X.HasDFP = has(hwcap_DFP)
- S390X.HasMSA = has(hwcap_MSA)
- S390X.HasVX = has(hwcap_VX)
- if S390X.HasVX {
- S390X.HasVXE = has(hwcap_VXE)
- }
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_loong64.go b/vendor/golang.org/x/sys/cpu/cpu_loong64.go
deleted file mode 100644
index 0f57b05..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_loong64.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build loong64
-// +build loong64
-
-package cpu
-
-const cacheLineSize = 64
-
-func initOptions() {
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
deleted file mode 100644
index f4063c6..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build mips64 || mips64le
-// +build mips64 mips64le
-
-package cpu
-
-const cacheLineSize = 32
-
-func initOptions() {
- options = []option{
- {Name: "msa", Feature: &MIPS64X.HasMSA},
- }
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
deleted file mode 100644
index 07c4e36..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build mips || mipsle
-// +build mips mipsle
-
-package cpu
-
-const cacheLineSize = 32
-
-func initOptions() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
deleted file mode 100644
index ebfb3fc..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-import (
- "syscall"
- "unsafe"
-)
-
-// Minimal copy of functionality from x/sys/unix so the cpu package can call
-// sysctl without depending on x/sys/unix.
-
-const (
- _CTL_QUERY = -2
-
- _SYSCTL_VERS_1 = 0x1000000
-)
-
-var _zero uintptr
-
-func sysctl(mib []int32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
- var _p0 unsafe.Pointer
- if len(mib) > 0 {
- _p0 = unsafe.Pointer(&mib[0])
- } else {
- _p0 = unsafe.Pointer(&_zero)
- }
- _, _, errno := syscall.Syscall6(
- syscall.SYS___SYSCTL,
- uintptr(_p0),
- uintptr(len(mib)),
- uintptr(unsafe.Pointer(old)),
- uintptr(unsafe.Pointer(oldlen)),
- uintptr(unsafe.Pointer(new)),
- uintptr(newlen))
- if errno != 0 {
- return errno
- }
- return nil
-}
-
-type sysctlNode struct {
- Flags uint32
- Num int32
- Name [32]int8
- Ver uint32
- __rsvd uint32
- Un [16]byte
- _sysctl_size [8]byte
- _sysctl_func [8]byte
- _sysctl_parent [8]byte
- _sysctl_desc [8]byte
-}
-
-func sysctlNodes(mib []int32) ([]sysctlNode, error) {
- var olen uintptr
-
- // Get a list of all sysctl nodes below the given MIB by performing
- // a sysctl for the given MIB with CTL_QUERY appended.
- mib = append(mib, _CTL_QUERY)
- qnode := sysctlNode{Flags: _SYSCTL_VERS_1}
- qp := (*byte)(unsafe.Pointer(&qnode))
- sz := unsafe.Sizeof(qnode)
- if err := sysctl(mib, nil, &olen, qp, sz); err != nil {
- return nil, err
- }
-
- // Now that we know the size, get the actual nodes.
- nodes := make([]sysctlNode, olen/sz)
- np := (*byte)(unsafe.Pointer(&nodes[0]))
- if err := sysctl(mib, np, &olen, qp, sz); err != nil {
- return nil, err
- }
-
- return nodes, nil
-}
-
-func nametomib(name string) ([]int32, error) {
- // Split name into components.
- var parts []string
- last := 0
- for i := 0; i < len(name); i++ {
- if name[i] == '.' {
- parts = append(parts, name[last:i])
- last = i + 1
- }
- }
- parts = append(parts, name[last:])
-
- mib := []int32{}
- // Discover the nodes and construct the MIB OID.
- for partno, part := range parts {
- nodes, err := sysctlNodes(mib)
- if err != nil {
- return nil, err
- }
- for _, node := range nodes {
- n := make([]byte, 0)
- for i := range node.Name {
- if node.Name[i] != 0 {
- n = append(n, byte(node.Name[i]))
- }
- }
- if string(n) == part {
- mib = append(mib, int32(node.Num))
- break
- }
- }
- if len(mib) != partno+1 {
- return nil, err
- }
- }
-
- return mib, nil
-}
-
-// aarch64SysctlCPUID is struct aarch64_sysctl_cpu_id from NetBSD's
-type aarch64SysctlCPUID struct {
- midr uint64 /* Main ID Register */
- revidr uint64 /* Revision ID Register */
- mpidr uint64 /* Multiprocessor Affinity Register */
- aa64dfr0 uint64 /* A64 Debug Feature Register 0 */
- aa64dfr1 uint64 /* A64 Debug Feature Register 1 */
- aa64isar0 uint64 /* A64 Instruction Set Attribute Register 0 */
- aa64isar1 uint64 /* A64 Instruction Set Attribute Register 1 */
- aa64mmfr0 uint64 /* A64 Memory Model Feature Register 0 */
- aa64mmfr1 uint64 /* A64 Memory Model Feature Register 1 */
- aa64mmfr2 uint64 /* A64 Memory Model Feature Register 2 */
- aa64pfr0 uint64 /* A64 Processor Feature Register 0 */
- aa64pfr1 uint64 /* A64 Processor Feature Register 1 */
- aa64zfr0 uint64 /* A64 SVE Feature ID Register 0 */
- mvfr0 uint32 /* Media and VFP Feature Register 0 */
- mvfr1 uint32 /* Media and VFP Feature Register 1 */
- mvfr2 uint32 /* Media and VFP Feature Register 2 */
- pad uint32
- clidr uint64 /* Cache Level ID Register */
- ctr uint64 /* Cache Type Register */
-}
-
-func sysctlCPUID(name string) (*aarch64SysctlCPUID, error) {
- mib, err := nametomib(name)
- if err != nil {
- return nil, err
- }
-
- out := aarch64SysctlCPUID{}
- n := unsafe.Sizeof(out)
- _, _, errno := syscall.Syscall6(
- syscall.SYS___SYSCTL,
- uintptr(unsafe.Pointer(&mib[0])),
- uintptr(len(mib)),
- uintptr(unsafe.Pointer(&out)),
- uintptr(unsafe.Pointer(&n)),
- uintptr(0),
- uintptr(0))
- if errno != 0 {
- return nil, errno
- }
- return &out, nil
-}
-
-func doinit() {
- cpuid, err := sysctlCPUID("machdep.cpu0.cpu_id")
- if err != nil {
- setMinimalFeatures()
- return
- }
- parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64pfr0)
-
- Initialized = true
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go
deleted file mode 100644
index 85b64d5..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-import (
- "syscall"
- "unsafe"
-)
-
-// Minimal copy of functionality from x/sys/unix so the cpu package can call
-// sysctl without depending on x/sys/unix.
-
-const (
- // From OpenBSD's sys/sysctl.h.
- _CTL_MACHDEP = 7
-
- // From OpenBSD's machine/cpu.h.
- _CPU_ID_AA64ISAR0 = 2
- _CPU_ID_AA64ISAR1 = 3
-)
-
-// Implemented in the runtime package (runtime/sys_openbsd3.go)
-func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
-
-//go:linkname syscall_syscall6 syscall.syscall6
-
-func sysctl(mib []uint32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
- _, _, errno := syscall_syscall6(libc_sysctl_trampoline_addr, uintptr(unsafe.Pointer(&mib[0])), uintptr(len(mib)), uintptr(unsafe.Pointer(old)), uintptr(unsafe.Pointer(oldlen)), uintptr(unsafe.Pointer(new)), uintptr(newlen))
- if errno != 0 {
- return errno
- }
- return nil
-}
-
-var libc_sysctl_trampoline_addr uintptr
-
-//go:cgo_import_dynamic libc_sysctl sysctl "libc.so"
-
-func sysctlUint64(mib []uint32) (uint64, bool) {
- var out uint64
- nout := unsafe.Sizeof(out)
- if err := sysctl(mib, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); err != nil {
- return 0, false
- }
- return out, true
-}
-
-func doinit() {
- setMinimalFeatures()
-
- // Get ID_AA64ISAR0 and ID_AA64ISAR1 from sysctl.
- isar0, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR0})
- if !ok {
- return
- }
- isar1, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR1})
- if !ok {
- return
- }
- parseARM64SystemRegisters(isar0, isar1, 0)
-
- Initialized = true
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s
deleted file mode 100644
index 054ba05..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
- JMP libc_sysctl(SB)
-
-GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8
-DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
deleted file mode 100644
index d7b4fb4..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux && arm
-// +build !linux,arm
-
-package cpu
-
-func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
deleted file mode 100644
index f3cde12..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux && !netbsd && !openbsd && arm64
-// +build !linux,!netbsd,!openbsd,arm64
-
-package cpu
-
-func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
deleted file mode 100644
index 0dafe96..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux && (mips64 || mips64le)
-// +build !linux
-// +build mips64 mips64le
-
-package cpu
-
-func archInit() {
- Initialized = true
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
deleted file mode 100644
index dd10eb7..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !linux && riscv64
-// +build !linux,riscv64
-
-package cpu
-
-func archInit() {
- Initialized = true
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
deleted file mode 100644
index 4e8acd1..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build ppc64 || ppc64le
-// +build ppc64 ppc64le
-
-package cpu
-
-const cacheLineSize = 128
-
-func initOptions() {
- options = []option{
- {Name: "darn", Feature: &PPC64.HasDARN},
- {Name: "scv", Feature: &PPC64.HasSCV},
- }
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
deleted file mode 100644
index bd6c128..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build riscv64
-// +build riscv64
-
-package cpu
-
-const cacheLineSize = 32
-
-func initOptions() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_s390x.go
deleted file mode 100644
index 5881b88..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_s390x.go
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cpu
-
-const cacheLineSize = 256
-
-func initOptions() {
- options = []option{
- {Name: "zarch", Feature: &S390X.HasZARCH, Required: true},
- {Name: "stfle", Feature: &S390X.HasSTFLE, Required: true},
- {Name: "ldisp", Feature: &S390X.HasLDISP, Required: true},
- {Name: "eimm", Feature: &S390X.HasEIMM, Required: true},
- {Name: "dfp", Feature: &S390X.HasDFP},
- {Name: "etf3eh", Feature: &S390X.HasETF3EH},
- {Name: "msa", Feature: &S390X.HasMSA},
- {Name: "aes", Feature: &S390X.HasAES},
- {Name: "aescbc", Feature: &S390X.HasAESCBC},
- {Name: "aesctr", Feature: &S390X.HasAESCTR},
- {Name: "aesgcm", Feature: &S390X.HasAESGCM},
- {Name: "ghash", Feature: &S390X.HasGHASH},
- {Name: "sha1", Feature: &S390X.HasSHA1},
- {Name: "sha256", Feature: &S390X.HasSHA256},
- {Name: "sha3", Feature: &S390X.HasSHA3},
- {Name: "sha512", Feature: &S390X.HasSHA512},
- {Name: "vx", Feature: &S390X.HasVX},
- {Name: "vxe", Feature: &S390X.HasVXE},
- }
-}
-
-// bitIsSet reports whether the bit at index is set. The bit index
-// is in big endian order, so bit index 0 is the leftmost bit.
-func bitIsSet(bits []uint64, index uint) bool {
- return bits[index/64]&((1<<63)>>(index%64)) != 0
-}
-
-// facility is a bit index for the named facility.
-type facility uint8
-
-const (
- // mandatory facilities
- zarch facility = 1 // z architecture mode is active
- stflef facility = 7 // store-facility-list-extended
- ldisp facility = 18 // long-displacement
- eimm facility = 21 // extended-immediate
-
- // miscellaneous facilities
- dfp facility = 42 // decimal-floating-point
- etf3eh facility = 30 // extended-translation 3 enhancement
-
- // cryptography facilities
- msa facility = 17 // message-security-assist
- msa3 facility = 76 // message-security-assist extension 3
- msa4 facility = 77 // message-security-assist extension 4
- msa5 facility = 57 // message-security-assist extension 5
- msa8 facility = 146 // message-security-assist extension 8
- msa9 facility = 155 // message-security-assist extension 9
-
- // vector facilities
- vx facility = 129 // vector facility
- vxe facility = 135 // vector-enhancements 1
- vxe2 facility = 148 // vector-enhancements 2
-)
-
-// facilityList contains the result of an STFLE call.
-// Bits are numbered in big endian order so the
-// leftmost bit (the MSB) is at index 0.
-type facilityList struct {
- bits [4]uint64
-}
-
-// Has reports whether the given facilities are present.
-func (s *facilityList) Has(fs ...facility) bool {
- if len(fs) == 0 {
- panic("no facility bits provided")
- }
- for _, f := range fs {
- if !bitIsSet(s.bits[:], uint(f)) {
- return false
- }
- }
- return true
-}
-
-// function is the code for the named cryptographic function.
-type function uint8
-
-const (
- // KM{,A,C,CTR} function codes
- aes128 function = 18 // AES-128
- aes192 function = 19 // AES-192
- aes256 function = 20 // AES-256
-
- // K{I,L}MD function codes
- sha1 function = 1 // SHA-1
- sha256 function = 2 // SHA-256
- sha512 function = 3 // SHA-512
- sha3_224 function = 32 // SHA3-224
- sha3_256 function = 33 // SHA3-256
- sha3_384 function = 34 // SHA3-384
- sha3_512 function = 35 // SHA3-512
- shake128 function = 36 // SHAKE-128
- shake256 function = 37 // SHAKE-256
-
- // KLMD function codes
- ghash function = 65 // GHASH
-)
-
-// queryResult contains the result of a Query function
-// call. Bits are numbered in big endian order so the
-// leftmost bit (the MSB) is at index 0.
-type queryResult struct {
- bits [2]uint64
-}
-
-// Has reports whether the given functions are present.
-func (q *queryResult) Has(fns ...function) bool {
- if len(fns) == 0 {
- panic("no function codes provided")
- }
- for _, f := range fns {
- if !bitIsSet(q.bits[:], uint(f)) {
- return false
- }
- }
- return true
-}
-
-func doinit() {
- initS390Xbase()
-
- // We need implementations of stfle, km and so on
- // to detect cryptographic features.
- if !haveAsmFunctions() {
- return
- }
-
- // optional cryptographic functions
- if S390X.HasMSA {
- aes := []function{aes128, aes192, aes256}
-
- // cipher message
- km, kmc := kmQuery(), kmcQuery()
- S390X.HasAES = km.Has(aes...)
- S390X.HasAESCBC = kmc.Has(aes...)
- if S390X.HasSTFLE {
- facilities := stfle()
- if facilities.Has(msa4) {
- kmctr := kmctrQuery()
- S390X.HasAESCTR = kmctr.Has(aes...)
- }
- if facilities.Has(msa8) {
- kma := kmaQuery()
- S390X.HasAESGCM = kma.Has(aes...)
- }
- }
-
- // compute message digest
- kimd := kimdQuery() // intermediate (no padding)
- klmd := klmdQuery() // last (padding)
- S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
- S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
- S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
- S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
- sha3 := []function{
- sha3_224, sha3_256, sha3_384, sha3_512,
- shake128, shake256,
- }
- S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...)
- }
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.s b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
deleted file mode 100644
index 96f81e2..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_s390x.s
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-// func stfle() facilityList
-TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
- MOVD $ret+0(FP), R1
- MOVD $3, R0 // last doubleword index to store
- XC $32, (R1), (R1) // clear 4 doublewords (32 bytes)
- WORD $0xb2b01000 // store facility list extended (STFLE)
- RET
-
-// func kmQuery() queryResult
-TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KM-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xB92E0024 // cipher message (KM)
- RET
-
-// func kmcQuery() queryResult
-TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KMC-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xB92F0024 // cipher message with chaining (KMC)
- RET
-
-// func kmctrQuery() queryResult
-TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KMCTR-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xB92D4024 // cipher message with counter (KMCTR)
- RET
-
-// func kmaQuery() queryResult
-TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KMA-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xb9296024 // cipher message with authentication (KMA)
- RET
-
-// func kimdQuery() queryResult
-TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KIMD-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xB93E0024 // compute intermediate message digest (KIMD)
- RET
-
-// func klmdQuery() queryResult
-TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
- MOVD $0, R0 // set function code to 0 (KLMD-Query)
- MOVD $ret+0(FP), R1 // address of 16-byte return value
- WORD $0xB93F0024 // compute last message digest (KLMD)
- RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_wasm.go b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
deleted file mode 100644
index 7747d88..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_wasm.go
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build wasm
-// +build wasm
-
-package cpu
-
-// We're compiling the cpu package for an unknown (software-abstracted) CPU.
-// Make CacheLinePad an empty struct and hope that the usual struct alignment
-// rules are good enough.
-
-const cacheLineSize = 0
-
-func initOptions() {}
-
-func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go
deleted file mode 100644
index f5aacfc..0000000
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.go
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build 386 || amd64 || amd64p32
-// +build 386 amd64 amd64p32
-
-package cpu
-
-import "runtime"
-
-const cacheLineSize = 64
-
-func initOptions() {
- options = []option{
- {Name: "adx", Feature: &X86.HasADX},
- {Name: "aes", Feature: &X86.HasAES},
- {Name: "avx", Feature: &X86.HasAVX},
- {Name: "avx2", Feature: &X86.HasAVX2},
- {Name: "avx512", Feature: &X86.HasAVX512},
- {Name: "avx512f", Feature: &X86.HasAVX512F},
- {Name: "avx512cd", Feature: &X86.HasAVX512CD},
- {Name: "avx512er", Feature: &X86.HasAVX512ER},
- {Name: "avx512pf", Feature: &X86.HasAVX512PF},
- {Name: "avx512vl", Feature: &X86.HasAVX512VL},
- {Name: "avx512bw", Feature: &X86.HasAVX512BW},
- {Name: "avx512dq", Feature: &X86.HasAVX512DQ},
- {Name: "avx512ifma", Feature: &X86.HasAVX512IFMA},
- {Name: "avx512vbmi", Feature: &X86.HasAVX512VBMI},
- {Name: "avx512vnniw", Feature: &X86.HasAVX5124VNNIW},
- {Name: "avx5124fmaps", Feature: &X86.HasAVX5124FMAPS},
- {Name: "avx512vpopcntdq", Feature: &X86.HasAVX512VPOPCNTDQ},
- {Name: "avx512vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ},
- {Name: "avx512vnni", Feature: &X86.HasAVX512VNNI},
- {Name: "avx512gfni", Feature: &X86.HasAVX512GFNI},
- {Name: "avx512vaes", Feature: &X86.HasAVX512VAES},
- {Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
- {Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
- {Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
- {Name: "bmi1", Feature: &X86.HasBMI1},
- {Name: "bmi2", Feature: &X86.HasBMI2},
- {Name: "cx16", Feature: &X86.HasCX16},
- {Name: "erms", Feature: &X86.HasERMS},
- {Name: "fma", Feature: &X86.HasFMA},
- {Name: "osxsave", Feature: &X86.HasOSXSAVE},
- {Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
- {Name: "popcnt", Feature: &X86.HasPOPCNT},
- {Name: "rdrand", Feature: &X86.HasRDRAND},
- {Name: "rdseed", Feature: &X86.HasRDSEED},
- {Name: "sse3", Feature: &X86.HasSSE3},
- {Name: "sse41", Feature: &X86.HasSSE41},
- {Name: "sse42", Feature: &X86.HasSSE42},
- {Name: "ssse3", Feature: &X86.HasSSSE3},
-
- // These capabilities should always be enabled on amd64:
- {Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
- }
-}
-
-func archInit() {
-
- Initialized = true
-
- maxID, _, _, _ := cpuid(0, 0)
-
- if maxID < 1 {
- return
- }
-
- _, _, ecx1, edx1 := cpuid(1, 0)
- X86.HasSSE2 = isSet(26, edx1)
-
- X86.HasSSE3 = isSet(0, ecx1)
- X86.HasPCLMULQDQ = isSet(1, ecx1)
- X86.HasSSSE3 = isSet(9, ecx1)
- X86.HasFMA = isSet(12, ecx1)
- X86.HasCX16 = isSet(13, ecx1)
- X86.HasSSE41 = isSet(19, ecx1)
- X86.HasSSE42 = isSet(20, ecx1)
- X86.HasPOPCNT = isSet(23, ecx1)
- X86.HasAES = isSet(25, ecx1)
- X86.HasOSXSAVE = isSet(27, ecx1)
- X86.HasRDRAND = isSet(30, ecx1)
-
- var osSupportsAVX, osSupportsAVX512 bool
- // For XGETBV, OSXSAVE bit is required and sufficient.
- if X86.HasOSXSAVE {
- eax, _ := xgetbv()
- // Check if XMM and YMM registers have OS support.
- osSupportsAVX = isSet(1, eax) && isSet(2, eax)
-
- if runtime.GOOS == "darwin" {
- // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
- // Since users can't rely on mask register contents, let's not advertise AVX-512 support.
- // See issue 49233.
- osSupportsAVX512 = false
- } else {
- // Check if OPMASK and ZMM registers have OS support.
- osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
- }
- }
-
- X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
-
- if maxID < 7 {
- return
- }
-
- _, ebx7, ecx7, edx7 := cpuid(7, 0)
- X86.HasBMI1 = isSet(3, ebx7)
- X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
- X86.HasBMI2 = isSet(8, ebx7)
- X86.HasERMS = isSet(9, ebx7)
- X86.HasRDSEED = isSet(18, ebx7)
- X86.HasADX = isSet(19, ebx7)
-
- X86.HasAVX512 = isSet(16, ebx7) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
- if X86.HasAVX512 {
- X86.HasAVX512F = true
- X86.HasAVX512CD = isSet(28, ebx7)
- X86.HasAVX512ER = isSet(27, ebx7)
- X86.HasAVX512PF = isSet(26, ebx7)
- X86.HasAVX512VL = isSet(31, ebx7)
- X86.HasAVX512BW = isSet(30, ebx7)
- X86.HasAVX512DQ = isSet(17, ebx7)
- X86.HasAVX512IFMA = isSet(21, ebx7)
- X86.HasAVX512VBMI = isSet(1, ecx7)
- X86.HasAVX5124VNNIW = isSet(2, edx7)
- X86.HasAVX5124FMAPS = isSet(3, edx7)
- X86.HasAVX512VPOPCNTDQ = isSet(14, ecx7)
- X86.HasAVX512VPCLMULQDQ = isSet(10, ecx7)
- X86.HasAVX512VNNI = isSet(11, ecx7)
- X86.HasAVX512GFNI = isSet(8, ecx7)
- X86.HasAVX512VAES = isSet(9, ecx7)
- X86.HasAVX512VBMI2 = isSet(6, ecx7)
- X86.HasAVX512BITALG = isSet(12, ecx7)
-
- eax71, _, _, _ := cpuid(7, 1)
- X86.HasAVX512BF16 = isSet(5, eax71)
- }
-}
-
-func isSet(bitpos uint, value uint32) bool {
- return value&(1<> 63))
-)
-
-// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2
-// These are initialized in cpu_$GOARCH.go
-// and should not be changed after they are initialized.
-var hwCap uint
-var hwCap2 uint
-
-func readHWCAP() error {
- buf, err := ioutil.ReadFile(procAuxv)
- if err != nil {
- // e.g. on android /proc/self/auxv is not accessible, so silently
- // ignore the error and leave Initialized = false. On some
- // architectures (e.g. arm64) doinit() implements a fallback
- // readout and will set Initialized = true again.
- return err
- }
- bo := hostByteOrder()
- for len(buf) >= 2*(uintSize/8) {
- var tag, val uint
- switch uintSize {
- case 32:
- tag = uint(bo.Uint32(buf[0:]))
- val = uint(bo.Uint32(buf[4:]))
- buf = buf[8:]
- case 64:
- tag = uint(bo.Uint64(buf[0:]))
- val = uint(bo.Uint64(buf[8:]))
- buf = buf[16:]
- }
- switch tag {
- case _AT_HWCAP:
- hwCap = val
- case _AT_HWCAP2:
- hwCap2 = val
- }
- }
- return nil
-}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
deleted file mode 100644
index 9613415..0000000
--- a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Recreate a getsystemcfg syscall handler instead of
-// using the one provided by x/sys/unix to avoid having
-// the dependency between them. (See golang.org/issue/32102)
-// Moreover, this file will be used during the building of
-// gccgo's libgo and thus must not used a CGo method.
-
-//go:build aix && gccgo
-// +build aix,gccgo
-
-package cpu
-
-import (
- "syscall"
-)
-
-//extern getsystemcfg
-func gccgoGetsystemcfg(label uint32) (r uint64)
-
-func callgetsystemcfg(label int) (r1 uintptr, e1 syscall.Errno) {
- r1 = uintptr(gccgoGetsystemcfg(uint32(label)))
- e1 = syscall.GetErrno()
- return
-}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
deleted file mode 100644
index 904be42..0000000
--- a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Minimal copy of x/sys/unix so the cpu package can make a
-// system call on AIX without depending on x/sys/unix.
-// (See golang.org/issue/32102)
-
-//go:build aix && ppc64 && gc
-// +build aix,ppc64,gc
-
-package cpu
-
-import (
- "syscall"
- "unsafe"
-)
-
-//go:cgo_import_dynamic libc_getsystemcfg getsystemcfg "libc.a/shr_64.o"
-
-//go:linkname libc_getsystemcfg libc_getsystemcfg
-
-type syscallFunc uintptr
-
-var libc_getsystemcfg syscallFunc
-
-type errno = syscall.Errno
-
-// Implemented in runtime/syscall_aix.go.
-func rawSyscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
-func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
-
-func callgetsystemcfg(label int) (r1 uintptr, e1 errno) {
- r1, _, e1 = syscall6(uintptr(unsafe.Pointer(&libc_getsystemcfg)), 1, uintptr(label), 0, 0, 0, 0, 0)
- return
-}
diff --git a/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go b/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
deleted file mode 100644
index e07899b..0000000
--- a/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package unsafeheader contains header declarations for the Go runtime's
-// slice and string implementations.
-//
-// This package allows x/sys to use types equivalent to
-// reflect.SliceHeader and reflect.StringHeader without introducing
-// a dependency on the (relatively heavy) "reflect" package.
-package unsafeheader
-
-import (
- "unsafe"
-)
-
-// Slice is the runtime representation of a slice.
-// It cannot be used safely or portably and its representation may change in a later release.
-type Slice struct {
- Data unsafe.Pointer
- Len int
- Cap int
-}
-
-// String is the runtime representation of a string.
-// It cannot be used safely or portably and its representation may change in a later release.
-type String struct {
- Data unsafe.Pointer
- Len int
-}
diff --git a/vendor/golang.org/x/sys/unix/.gitignore b/vendor/golang.org/x/sys/unix/.gitignore
deleted file mode 100644
index e3e0fc6..0000000
--- a/vendor/golang.org/x/sys/unix/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-_obj/
-unix.test
diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md
deleted file mode 100644
index 7d3c060..0000000
--- a/vendor/golang.org/x/sys/unix/README.md
+++ /dev/null
@@ -1,184 +0,0 @@
-# Building `sys/unix`
-
-The sys/unix package provides access to the raw system call interface of the
-underlying operating system. See: https://godoc.org/golang.org/x/sys/unix
-
-Porting Go to a new architecture/OS combination or adding syscalls, types, or
-constants to an existing architecture/OS pair requires some manual effort;
-however, there are tools that automate much of the process.
-
-## Build Systems
-
-There are currently two ways we generate the necessary files. We are currently
-migrating the build system to use containers so the builds are reproducible.
-This is being done on an OS-by-OS basis. Please update this documentation as
-components of the build system change.
-
-### Old Build System (currently for `GOOS != "linux"`)
-
-The old build system generates the Go files based on the C header files
-present on your system. This means that files
-for a given GOOS/GOARCH pair must be generated on a system with that OS and
-architecture. This also means that the generated code can differ from system
-to system, based on differences in the header files.
-
-To avoid this, if you are using the old build system, only generate the Go
-files on an installation with unmodified header files. It is also important to
-keep track of which version of the OS the files were generated from (ex.
-Darwin 14 vs Darwin 15). This makes it easier to track the progress of changes
-and have each OS upgrade correspond to a single change.
-
-To build the files for your current OS and architecture, make sure GOOS and
-GOARCH are set correctly and run `mkall.sh`. This will generate the files for
-your specific system. Running `mkall.sh -n` shows the commands that will be run.
-
-Requirements: bash, go
-
-### New Build System (currently for `GOOS == "linux"`)
-
-The new build system uses a Docker container to generate the go files directly
-from source checkouts of the kernel and various system libraries. This means
-that on any platform that supports Docker, all the files using the new build
-system can be generated at once, and generated files will not change based on
-what the person running the scripts has installed on their computer.
-
-The OS specific files for the new build system are located in the `${GOOS}`
-directory, and the build is coordinated by the `${GOOS}/mkall.go` program. When
-the kernel or system library updates, modify the Dockerfile at
-`${GOOS}/Dockerfile` to checkout the new release of the source.
-
-To build all the files under the new build system, you must be on an amd64/Linux
-system and have your GOOS and GOARCH set accordingly. Running `mkall.sh` will
-then generate all of the files for all of the GOOS/GOARCH pairs in the new build
-system. Running `mkall.sh -n` shows the commands that will be run.
-
-Requirements: bash, go, docker
-
-## Component files
-
-This section describes the various files used in the code generation process.
-It also contains instructions on how to modify these files to add a new
-architecture/OS or to add additional syscalls, types, or constants. Note that
-if you are using the new build system, the scripts/programs cannot be called normally.
-They must be called from within the docker container.
-
-### asm files
-
-The hand-written assembly file at `asm_${GOOS}_${GOARCH}.s` implements system
-call dispatch. There are three entry points:
-```
- func Syscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr)
- func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr)
- func RawSyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr)
-```
-The first and second are the standard ones; they differ only in how many
-arguments can be passed to the kernel. The third is for low-level use by the
-ForkExec wrapper. Unlike the first two, it does not call into the scheduler to
-let it know that a system call is running.
-
-When porting Go to a new architecture/OS, this file must be implemented for
-each GOOS/GOARCH pair.
-
-### mksysnum
-
-Mksysnum is a Go program located at `${GOOS}/mksysnum.go` (or `mksysnum_${GOOS}.go`
-for the old system). This program takes in a list of header files containing the
-syscall number declarations and parses them to produce the corresponding list of
-Go numeric constants. See `zsysnum_${GOOS}_${GOARCH}.go` for the generated
-constants.
-
-Adding new syscall numbers is mostly done by running the build on a sufficiently
-new installation of the target OS (or updating the source checkouts for the
-new build system). However, depending on the OS, you may need to update the
-parsing in mksysnum.
-
-### mksyscall.go
-
-The `syscall.go`, `syscall_${GOOS}.go`, `syscall_${GOOS}_${GOARCH}.go` are
-hand-written Go files which implement system calls (for unix, the specific OS,
-or the specific OS/Architecture pair respectively) that need special handling
-and list `//sys` comments giving prototypes for ones that can be generated.
-
-The mksyscall.go program takes the `//sys` and `//sysnb` comments and converts
-them into syscalls. This requires the name of the prototype in the comment to
-match a syscall number in the `zsysnum_${GOOS}_${GOARCH}.go` file. The function
-prototype can be exported (capitalized) or not.
-
-Adding a new syscall often just requires adding a new `//sys` function prototype
-with the desired arguments and a capitalized name so it is exported. However, if
-you want the interface to the syscall to be different, often one will make an
-unexported `//sys` prototype, and then write a custom wrapper in
-`syscall_${GOOS}.go`.
-
-### types files
-
-For each OS, there is a hand-written Go file at `${GOOS}/types.go` (or
-`types_${GOOS}.go` on the old system). This file includes standard C headers and
-creates Go type aliases to the corresponding C types. The file is then fed
-through godef to get the Go compatible definitions. Finally, the generated code
-is fed though mkpost.go to format the code correctly and remove any hidden or
-private identifiers. This cleaned-up code is written to
-`ztypes_${GOOS}_${GOARCH}.go`.
-
-The hardest part about preparing this file is figuring out which headers to
-include and which symbols need to be `#define`d to get the actual data
-structures that pass through to the kernel system calls. Some C libraries
-preset alternate versions for binary compatibility and translate them on the
-way in and out of system calls, but there is almost always a `#define` that can
-get the real ones.
-See `types_darwin.go` and `linux/types.go` for examples.
-
-To add a new type, add in the necessary include statement at the top of the
-file (if it is not already there) and add in a type alias line. Note that if
-your type is significantly different on different architectures, you may need
-some `#if/#elif` macros in your include statements.
-
-### mkerrors.sh
-
-This script is used to generate the system's various constants. This doesn't
-just include the error numbers and error strings, but also the signal numbers
-and a wide variety of miscellaneous constants. The constants come from the list
-of include files in the `includes_${uname}` variable. A regex then picks out
-the desired `#define` statements, and generates the corresponding Go constants.
-The error numbers and strings are generated from `#include `, and the
-signal numbers and strings are generated from `#include `. All of
-these constants are written to `zerrors_${GOOS}_${GOARCH}.go` via a C program,
-`_errors.c`, which prints out all the constants.
-
-To add a constant, add the header that includes it to the appropriate variable.
-Then, edit the regex (if necessary) to match the desired constant. Avoid making
-the regex too broad to avoid matching unintended constants.
-
-### internal/mkmerge
-
-This program is used to extract duplicate const, func, and type declarations
-from the generated architecture-specific files listed below, and merge these
-into a common file for each OS.
-
-The merge is performed in the following steps:
-1. Construct the set of common code that is idential in all architecture-specific files.
-2. Write this common code to the merged file.
-3. Remove the common code from all architecture-specific files.
-
-
-## Generated files
-
-### `zerrors_${GOOS}_${GOARCH}.go`
-
-A file containing all of the system's generated error numbers, error strings,
-signal numbers, and constants. Generated by `mkerrors.sh` (see above).
-
-### `zsyscall_${GOOS}_${GOARCH}.go`
-
-A file containing all the generated syscalls for a specific GOOS and GOARCH.
-Generated by `mksyscall.go` (see above).
-
-### `zsysnum_${GOOS}_${GOARCH}.go`
-
-A list of numeric constants for all the syscall number of the specific GOOS
-and GOARCH. Generated by mksysnum (see above).
-
-### `ztypes_${GOOS}_${GOARCH}.go`
-
-A file containing Go types for passing into (or returning from) syscalls.
-Generated by godefs and the types file (see above).
diff --git a/vendor/golang.org/x/sys/unix/affinity_linux.go b/vendor/golang.org/x/sys/unix/affinity_linux.go
deleted file mode 100644
index 6e5c81a..0000000
--- a/vendor/golang.org/x/sys/unix/affinity_linux.go
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// CPU affinity functions
-
-package unix
-
-import (
- "math/bits"
- "unsafe"
-)
-
-const cpuSetSize = _CPU_SETSIZE / _NCPUBITS
-
-// CPUSet represents a CPU affinity mask.
-type CPUSet [cpuSetSize]cpuMask
-
-func schedAffinity(trap uintptr, pid int, set *CPUSet) error {
- _, _, e := RawSyscall(trap, uintptr(pid), uintptr(unsafe.Sizeof(*set)), uintptr(unsafe.Pointer(set)))
- if e != 0 {
- return errnoErr(e)
- }
- return nil
-}
-
-// SchedGetaffinity gets the CPU affinity mask of the thread specified by pid.
-// If pid is 0 the calling thread is used.
-func SchedGetaffinity(pid int, set *CPUSet) error {
- return schedAffinity(SYS_SCHED_GETAFFINITY, pid, set)
-}
-
-// SchedSetaffinity sets the CPU affinity mask of the thread specified by pid.
-// If pid is 0 the calling thread is used.
-func SchedSetaffinity(pid int, set *CPUSet) error {
- return schedAffinity(SYS_SCHED_SETAFFINITY, pid, set)
-}
-
-// Zero clears the set s, so that it contains no CPUs.
-func (s *CPUSet) Zero() {
- for i := range s {
- s[i] = 0
- }
-}
-
-func cpuBitsIndex(cpu int) int {
- return cpu / _NCPUBITS
-}
-
-func cpuBitsMask(cpu int) cpuMask {
- return cpuMask(1 << (uint(cpu) % _NCPUBITS))
-}
-
-// Set adds cpu to the set s.
-func (s *CPUSet) Set(cpu int) {
- i := cpuBitsIndex(cpu)
- if i < len(s) {
- s[i] |= cpuBitsMask(cpu)
- }
-}
-
-// Clear removes cpu from the set s.
-func (s *CPUSet) Clear(cpu int) {
- i := cpuBitsIndex(cpu)
- if i < len(s) {
- s[i] &^= cpuBitsMask(cpu)
- }
-}
-
-// IsSet reports whether cpu is in the set s.
-func (s *CPUSet) IsSet(cpu int) bool {
- i := cpuBitsIndex(cpu)
- if i < len(s) {
- return s[i]&cpuBitsMask(cpu) != 0
- }
- return false
-}
-
-// Count returns the number of CPUs in the set s.
-func (s *CPUSet) Count() int {
- c := 0
- for _, b := range s {
- c += bits.OnesCount64(uint64(b))
- }
- return c
-}
diff --git a/vendor/golang.org/x/sys/unix/aliases.go b/vendor/golang.org/x/sys/unix/aliases.go
deleted file mode 100644
index abc89c1..0000000
--- a/vendor/golang.org/x/sys/unix/aliases.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos) && go1.9
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-// +build go1.9
-
-package unix
-
-import "syscall"
-
-type Signal = syscall.Signal
-type Errno = syscall.Errno
-type SysProcAttr = syscall.SysProcAttr
diff --git a/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s b/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
deleted file mode 100644
index db9171c..0000000
--- a/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go
-//
-
-TEXT ·syscall6(SB),NOSPLIT,$0-88
- JMP syscall·syscall6(SB)
-
-TEXT ·rawSyscall6(SB),NOSPLIT,$0-88
- JMP syscall·rawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_bsd_386.s b/vendor/golang.org/x/sys/unix/asm_bsd_386.s
deleted file mode 100644
index e0fcd9b..0000000
--- a/vendor/golang.org/x/sys/unix/asm_bsd_386.s
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (freebsd || netbsd || openbsd) && gc
-// +build freebsd netbsd openbsd
-// +build gc
-
-#include "textflag.h"
-
-// System call support for 386 BSD
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-28
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-40
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-52
- JMP syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-28
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-40
- JMP syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s b/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
deleted file mode 100644
index 2b99c34..0000000
--- a/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
-// +build darwin dragonfly freebsd netbsd openbsd
-// +build gc
-
-#include "textflag.h"
-
-// System call support for AMD64 BSD
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-104
- JMP syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_bsd_arm.s b/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
deleted file mode 100644
index d702d4a..0000000
--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (freebsd || netbsd || openbsd) && gc
-// +build freebsd netbsd openbsd
-// +build gc
-
-#include "textflag.h"
-
-// System call support for ARM BSD
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-28
- B syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-40
- B syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-52
- B syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-28
- B syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-40
- B syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s b/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
deleted file mode 100644
index fe36a73..0000000
--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (darwin || freebsd || netbsd || openbsd) && gc
-// +build darwin freebsd netbsd openbsd
-// +build gc
-
-#include "textflag.h"
-
-// System call support for ARM64 BSD
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-104
- JMP syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s b/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
deleted file mode 100644
index d560019..0000000
--- a/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (darwin || freebsd || netbsd || openbsd) && gc
-// +build darwin freebsd netbsd openbsd
-// +build gc
-
-#include "textflag.h"
-
-// System call support for RISCV64 BSD
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-104
- JMP syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_386.s b/vendor/golang.org/x/sys/unix/asm_linux_386.s
deleted file mode 100644
index 8fd101d..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_386.s
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for 386, Linux
-//
-
-// See ../runtime/sys_linux_386.s for the reason why we always use int 0x80
-// instead of the glibc-specific "CALL 0x10(GS)".
-#define INVOKE_SYSCALL INT $0x80
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-28
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-40
- JMP syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-24
- CALL runtime·entersyscall(SB)
- MOVL trap+0(FP), AX // syscall entry
- MOVL a1+4(FP), BX
- MOVL a2+8(FP), CX
- MOVL a3+12(FP), DX
- MOVL $0, SI
- MOVL $0, DI
- INVOKE_SYSCALL
- MOVL AX, r1+16(FP)
- MOVL DX, r2+20(FP)
- CALL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-28
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-40
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-24
- MOVL trap+0(FP), AX // syscall entry
- MOVL a1+4(FP), BX
- MOVL a2+8(FP), CX
- MOVL a3+12(FP), DX
- MOVL $0, SI
- MOVL $0, DI
- INVOKE_SYSCALL
- MOVL AX, r1+16(FP)
- MOVL DX, r2+20(FP)
- RET
-
-TEXT ·socketcall(SB),NOSPLIT,$0-36
- JMP syscall·socketcall(SB)
-
-TEXT ·rawsocketcall(SB),NOSPLIT,$0-36
- JMP syscall·rawsocketcall(SB)
-
-TEXT ·seek(SB),NOSPLIT,$0-28
- JMP syscall·seek(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_amd64.s b/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
deleted file mode 100644
index 7ed38e4..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for AMD64, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- CALL runtime·entersyscall(SB)
- MOVQ a1+8(FP), DI
- MOVQ a2+16(FP), SI
- MOVQ a3+24(FP), DX
- MOVQ $0, R10
- MOVQ $0, R8
- MOVQ $0, R9
- MOVQ trap+0(FP), AX // syscall entry
- SYSCALL
- MOVQ AX, r1+32(FP)
- MOVQ DX, r2+40(FP)
- CALL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVQ a1+8(FP), DI
- MOVQ a2+16(FP), SI
- MOVQ a3+24(FP), DX
- MOVQ $0, R10
- MOVQ $0, R8
- MOVQ $0, R9
- MOVQ trap+0(FP), AX // syscall entry
- SYSCALL
- MOVQ AX, r1+32(FP)
- MOVQ DX, r2+40(FP)
- RET
-
-TEXT ·gettimeofday(SB),NOSPLIT,$0-16
- JMP syscall·gettimeofday(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_arm.s b/vendor/golang.org/x/sys/unix/asm_linux_arm.s
deleted file mode 100644
index 8ef1d51..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_arm.s
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for arm, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-28
- B syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-40
- B syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-24
- BL runtime·entersyscall(SB)
- MOVW trap+0(FP), R7
- MOVW a1+4(FP), R0
- MOVW a2+8(FP), R1
- MOVW a3+12(FP), R2
- MOVW $0, R3
- MOVW $0, R4
- MOVW $0, R5
- SWI $0
- MOVW R0, r1+16(FP)
- MOVW $0, R0
- MOVW R0, r2+20(FP)
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-28
- B syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-40
- B syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-24
- MOVW trap+0(FP), R7 // syscall entry
- MOVW a1+4(FP), R0
- MOVW a2+8(FP), R1
- MOVW a3+12(FP), R2
- SWI $0
- MOVW R0, r1+16(FP)
- MOVW $0, R0
- MOVW R0, r2+20(FP)
- RET
-
-TEXT ·seek(SB),NOSPLIT,$0-28
- B syscall·seek(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_arm64.s b/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
deleted file mode 100644
index 98ae027..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && arm64 && gc
-// +build linux
-// +build arm64
-// +build gc
-
-#include "textflag.h"
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- B syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- B syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R0
- MOVD a2+16(FP), R1
- MOVD a3+24(FP), R2
- MOVD $0, R3
- MOVD $0, R4
- MOVD $0, R5
- MOVD trap+0(FP), R8 // syscall entry
- SVC
- MOVD R0, r1+32(FP) // r1
- MOVD R1, r2+40(FP) // r2
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- B syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- B syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVD a1+8(FP), R0
- MOVD a2+16(FP), R1
- MOVD a3+24(FP), R2
- MOVD $0, R3
- MOVD $0, R4
- MOVD $0, R5
- MOVD trap+0(FP), R8 // syscall entry
- SVC
- MOVD R0, r1+32(FP)
- MOVD R1, r2+40(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_loong64.s b/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
deleted file mode 100644
index 5653572..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && loong64 && gc
-// +build linux
-// +build loong64
-// +build gc
-
-#include "textflag.h"
-
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- JAL runtime·entersyscall(SB)
- MOVV a1+8(FP), R4
- MOVV a2+16(FP), R5
- MOVV a3+24(FP), R6
- MOVV R0, R7
- MOVV R0, R8
- MOVV R0, R9
- MOVV trap+0(FP), R11 // syscall entry
- SYSCALL
- MOVV R4, r1+32(FP)
- MOVV R0, r2+40(FP) // r2 is not used. Always set to 0
- JAL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVV a1+8(FP), R4
- MOVV a2+16(FP), R5
- MOVV a3+24(FP), R6
- MOVV R0, R7
- MOVV R0, R8
- MOVV R0, R9
- MOVV trap+0(FP), R11 // syscall entry
- SYSCALL
- MOVV R4, r1+32(FP)
- MOVV R0, r2+40(FP) // r2 is not used. Always set to 0
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s b/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
deleted file mode 100644
index 21231d2..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && (mips64 || mips64le) && gc
-// +build linux
-// +build mips64 mips64le
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for mips64, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- JAL runtime·entersyscall(SB)
- MOVV a1+8(FP), R4
- MOVV a2+16(FP), R5
- MOVV a3+24(FP), R6
- MOVV R0, R7
- MOVV R0, R8
- MOVV R0, R9
- MOVV trap+0(FP), R2 // syscall entry
- SYSCALL
- MOVV R2, r1+32(FP)
- MOVV R3, r2+40(FP)
- JAL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVV a1+8(FP), R4
- MOVV a2+16(FP), R5
- MOVV a3+24(FP), R6
- MOVV R0, R7
- MOVV R0, R8
- MOVV R0, R9
- MOVV trap+0(FP), R2 // syscall entry
- SYSCALL
- MOVV R2, r1+32(FP)
- MOVV R3, r2+40(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_mipsx.s b/vendor/golang.org/x/sys/unix/asm_linux_mipsx.s
deleted file mode 100644
index 6783b26..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_mipsx.s
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && (mips || mipsle) && gc
-// +build linux
-// +build mips mipsle
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for mips, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-28
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-40
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-52
- JMP syscall·Syscall9(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-24
- JAL runtime·entersyscall(SB)
- MOVW a1+4(FP), R4
- MOVW a2+8(FP), R5
- MOVW a3+12(FP), R6
- MOVW R0, R7
- MOVW trap+0(FP), R2 // syscall entry
- SYSCALL
- MOVW R2, r1+16(FP) // r1
- MOVW R3, r2+20(FP) // r2
- JAL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-28
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-40
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-24
- MOVW a1+4(FP), R4
- MOVW a2+8(FP), R5
- MOVW a3+12(FP), R6
- MOVW trap+0(FP), R2 // syscall entry
- SYSCALL
- MOVW R2, r1+16(FP)
- MOVW R3, r2+20(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_ppc64x.s b/vendor/golang.org/x/sys/unix/asm_linux_ppc64x.s
deleted file mode 100644
index 19d4989..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_ppc64x.s
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && (ppc64 || ppc64le) && gc
-// +build linux
-// +build ppc64 ppc64le
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for ppc64, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R3
- MOVD a2+16(FP), R4
- MOVD a3+24(FP), R5
- MOVD R0, R6
- MOVD R0, R7
- MOVD R0, R8
- MOVD trap+0(FP), R9 // syscall entry
- SYSCALL R9
- MOVD R3, r1+32(FP)
- MOVD R4, r2+40(FP)
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVD a1+8(FP), R3
- MOVD a2+16(FP), R4
- MOVD a3+24(FP), R5
- MOVD R0, R6
- MOVD R0, R7
- MOVD R0, R8
- MOVD trap+0(FP), R9 // syscall entry
- SYSCALL R9
- MOVD R3, r1+32(FP)
- MOVD R4, r2+40(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_riscv64.s b/vendor/golang.org/x/sys/unix/asm_linux_riscv64.s
deleted file mode 100644
index e42eb81..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_riscv64.s
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build riscv64 && gc
-// +build riscv64
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for linux/riscv64.
-//
-// Where available, just jump to package syscall's implementation of
-// these functions.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- CALL runtime·entersyscall(SB)
- MOV a1+8(FP), A0
- MOV a2+16(FP), A1
- MOV a3+24(FP), A2
- MOV trap+0(FP), A7 // syscall entry
- ECALL
- MOV A0, r1+32(FP) // r1
- MOV A1, r2+40(FP) // r2
- CALL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOV a1+8(FP), A0
- MOV a2+16(FP), A1
- MOV a3+24(FP), A2
- MOV trap+0(FP), A7 // syscall entry
- ECALL
- MOV A0, r1+32(FP)
- MOV A1, r2+40(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_linux_s390x.s b/vendor/golang.org/x/sys/unix/asm_linux_s390x.s
deleted file mode 100644
index c46aab3..0000000
--- a/vendor/golang.org/x/sys/unix/asm_linux_s390x.s
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux && s390x && gc
-// +build linux
-// +build s390x
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for s390x, Linux
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- BR syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- BR syscall·Syscall6(SB)
-
-TEXT ·SyscallNoError(SB),NOSPLIT,$0-48
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R2
- MOVD a2+16(FP), R3
- MOVD a3+24(FP), R4
- MOVD $0, R5
- MOVD $0, R6
- MOVD $0, R7
- MOVD trap+0(FP), R1 // syscall entry
- SYSCALL
- MOVD R2, r1+32(FP)
- MOVD R3, r2+40(FP)
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- BR syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- BR syscall·RawSyscall6(SB)
-
-TEXT ·RawSyscallNoError(SB),NOSPLIT,$0-48
- MOVD a1+8(FP), R2
- MOVD a2+16(FP), R3
- MOVD a3+24(FP), R4
- MOVD $0, R5
- MOVD $0, R6
- MOVD $0, R7
- MOVD trap+0(FP), R1 // syscall entry
- SYSCALL
- MOVD R2, r1+32(FP)
- MOVD R3, r2+40(FP)
- RET
diff --git a/vendor/golang.org/x/sys/unix/asm_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/asm_openbsd_mips64.s
deleted file mode 100644
index 5e7a116..0000000
--- a/vendor/golang.org/x/sys/unix/asm_openbsd_mips64.s
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System call support for mips64, OpenBSD
-//
-
-// Just jump to package syscall's implementation for all these functions.
-// The runtime may know about them.
-
-TEXT ·Syscall(SB),NOSPLIT,$0-56
- JMP syscall·Syscall(SB)
-
-TEXT ·Syscall6(SB),NOSPLIT,$0-80
- JMP syscall·Syscall6(SB)
-
-TEXT ·Syscall9(SB),NOSPLIT,$0-104
- JMP syscall·Syscall9(SB)
-
-TEXT ·RawSyscall(SB),NOSPLIT,$0-56
- JMP syscall·RawSyscall(SB)
-
-TEXT ·RawSyscall6(SB),NOSPLIT,$0-80
- JMP syscall·RawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_solaris_amd64.s b/vendor/golang.org/x/sys/unix/asm_solaris_amd64.s
deleted file mode 100644
index f8c5394..0000000
--- a/vendor/golang.org/x/sys/unix/asm_solaris_amd64.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc
-// +build gc
-
-#include "textflag.h"
-
-//
-// System calls for amd64, Solaris are implemented in runtime/syscall_solaris.go
-//
-
-TEXT ·sysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·sysvicall6(SB)
-
-TEXT ·rawSysvicall6(SB),NOSPLIT,$0-88
- JMP syscall·rawSysvicall6(SB)
diff --git a/vendor/golang.org/x/sys/unix/asm_zos_s390x.s b/vendor/golang.org/x/sys/unix/asm_zos_s390x.s
deleted file mode 100644
index 3b54e18..0000000
--- a/vendor/golang.org/x/sys/unix/asm_zos_s390x.s
+++ /dev/null
@@ -1,426 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build zos && s390x && gc
-// +build zos
-// +build s390x
-// +build gc
-
-#include "textflag.h"
-
-#define PSALAA 1208(R0)
-#define GTAB64(x) 80(x)
-#define LCA64(x) 88(x)
-#define CAA(x) 8(x)
-#define EDCHPXV(x) 1016(x) // in the CAA
-#define SAVSTACK_ASYNC(x) 336(x) // in the LCA
-
-// SS_*, where x=SAVSTACK_ASYNC
-#define SS_LE(x) 0(x)
-#define SS_GO(x) 8(x)
-#define SS_ERRNO(x) 16(x)
-#define SS_ERRNOJR(x) 20(x)
-
-#define LE_CALL BYTE $0x0D; BYTE $0x76; // BL R7, R6
-
-TEXT ·clearErrno(SB),NOSPLIT,$0-0
- BL addrerrno<>(SB)
- MOVD $0, 0(R3)
- RET
-
-// Returns the address of errno in R3.
-TEXT addrerrno<>(SB),NOSPLIT|NOFRAME,$0-0
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get __errno FuncDesc.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- ADD $(0x156*16), R9
- LMG 0(R9), R5, R6
-
- // Switch to saved LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Call __errno function.
- LE_CALL
- NOPH
-
- // Switch back to Go stack.
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
- RET
-
-TEXT ·syscall_syscall(SB),NOSPLIT,$0-56
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+32(FP)
- MOVD R0, r2+40(FP)
- MOVD R0, err+48(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL addrerrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+48(FP)
-done:
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·syscall_rawsyscall(SB),NOSPLIT,$0-56
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+32(FP)
- MOVD R0, r2+40(FP)
- MOVD R0, err+48(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL addrerrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+48(FP)
-done:
- RET
-
-TEXT ·syscall_syscall6(SB),NOSPLIT,$0-80
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Fill in parameter list.
- MOVD a4+32(FP), R12
- MOVD R12, (2176+24)(R4)
- MOVD a5+40(FP), R12
- MOVD R12, (2176+32)(R4)
- MOVD a6+48(FP), R12
- MOVD R12, (2176+40)(R4)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+56(FP)
- MOVD R0, r2+64(FP)
- MOVD R0, err+72(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL addrerrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+72(FP)
-done:
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·syscall_rawsyscall6(SB),NOSPLIT,$0-80
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Fill in parameter list.
- MOVD a4+32(FP), R12
- MOVD R12, (2176+24)(R4)
- MOVD a5+40(FP), R12
- MOVD R12, (2176+32)(R4)
- MOVD a6+48(FP), R12
- MOVD R12, (2176+40)(R4)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+56(FP)
- MOVD R0, r2+64(FP)
- MOVD R0, err+72(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL ·rrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+72(FP)
-done:
- RET
-
-TEXT ·syscall_syscall9(SB),NOSPLIT,$0
- BL runtime·entersyscall(SB)
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Fill in parameter list.
- MOVD a4+32(FP), R12
- MOVD R12, (2176+24)(R4)
- MOVD a5+40(FP), R12
- MOVD R12, (2176+32)(R4)
- MOVD a6+48(FP), R12
- MOVD R12, (2176+40)(R4)
- MOVD a7+56(FP), R12
- MOVD R12, (2176+48)(R4)
- MOVD a8+64(FP), R12
- MOVD R12, (2176+56)(R4)
- MOVD a9+72(FP), R12
- MOVD R12, (2176+64)(R4)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+80(FP)
- MOVD R0, r2+88(FP)
- MOVD R0, err+96(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL addrerrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+96(FP)
-done:
- BL runtime·exitsyscall(SB)
- RET
-
-TEXT ·syscall_rawsyscall9(SB),NOSPLIT,$0
- MOVD a1+8(FP), R1
- MOVD a2+16(FP), R2
- MOVD a3+24(FP), R3
-
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get function.
- MOVD CAA(R8), R9
- MOVD EDCHPXV(R9), R9
- MOVD trap+0(FP), R5
- SLD $4, R5
- ADD R5, R9
- LMG 0(R9), R5, R6
-
- // Restore LE stack.
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R4
- MOVD $0, 0(R9)
-
- // Fill in parameter list.
- MOVD a4+32(FP), R12
- MOVD R12, (2176+24)(R4)
- MOVD a5+40(FP), R12
- MOVD R12, (2176+32)(R4)
- MOVD a6+48(FP), R12
- MOVD R12, (2176+40)(R4)
- MOVD a7+56(FP), R12
- MOVD R12, (2176+48)(R4)
- MOVD a8+64(FP), R12
- MOVD R12, (2176+56)(R4)
- MOVD a9+72(FP), R12
- MOVD R12, (2176+64)(R4)
-
- // Call function.
- LE_CALL
- NOPH
- XOR R0, R0 // Restore R0 to $0.
- MOVD R4, 0(R9) // Save stack pointer.
-
- MOVD R3, r1+80(FP)
- MOVD R0, r2+88(FP)
- MOVD R0, err+96(FP)
- MOVW R3, R4
- CMP R4, $-1
- BNE done
- BL addrerrno<>(SB)
- MOVWZ 0(R3), R3
- MOVD R3, err+96(FP)
-done:
- RET
-
-// func svcCall(fnptr unsafe.Pointer, argv *unsafe.Pointer, dsa *uint64)
-TEXT ·svcCall(SB),NOSPLIT,$0
- BL runtime·save_g(SB) // Save g and stack pointer
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD R15, 0(R9)
-
- MOVD argv+8(FP), R1 // Move function arguments into registers
- MOVD dsa+16(FP), g
- MOVD fnptr+0(FP), R15
-
- BYTE $0x0D // Branch to function
- BYTE $0xEF
-
- BL runtime·load_g(SB) // Restore g and stack pointer
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
- MOVD SAVSTACK_ASYNC(R8), R9
- MOVD 0(R9), R15
-
- RET
-
-// func svcLoad(name *byte) unsafe.Pointer
-TEXT ·svcLoad(SB),NOSPLIT,$0
- MOVD R15, R2 // Save go stack pointer
- MOVD name+0(FP), R0 // Move SVC args into registers
- MOVD $0x80000000, R1
- MOVD $0, R15
- BYTE $0x0A // SVC 08 LOAD
- BYTE $0x08
- MOVW R15, R3 // Save return code from SVC
- MOVD R2, R15 // Restore go stack pointer
- CMP R3, $0 // Check SVC return code
- BNE error
-
- MOVD $-2, R3 // Reset last bit of entry point to zero
- AND R0, R3
- MOVD R3, addr+8(FP) // Return entry point returned by SVC
- CMP R0, R3 // Check if last bit of entry point was set
- BNE done
-
- MOVD R15, R2 // Save go stack pointer
- MOVD $0, R15 // Move SVC args into registers (entry point still in r0 from SVC 08)
- BYTE $0x0A // SVC 09 DELETE
- BYTE $0x09
- MOVD R2, R15 // Restore go stack pointer
-
-error:
- MOVD $0, addr+8(FP) // Return 0 on failure
-done:
- XOR R0, R0 // Reset r0 to 0
- RET
-
-// func svcUnload(name *byte, fnptr unsafe.Pointer) int64
-TEXT ·svcUnload(SB),NOSPLIT,$0
- MOVD R15, R2 // Save go stack pointer
- MOVD name+0(FP), R0 // Move SVC args into registers
- MOVD addr+8(FP), R15
- BYTE $0x0A // SVC 09
- BYTE $0x09
- XOR R0, R0 // Reset r0 to 0
- MOVD R15, R1 // Save SVC return code
- MOVD R2, R15 // Restore go stack pointer
- MOVD R1, rc+0(FP) // Return SVC return code
- RET
-
-// func gettid() uint64
-TEXT ·gettid(SB), NOSPLIT, $0
- // Get library control area (LCA).
- MOVW PSALAA, R8
- MOVD LCA64(R8), R8
-
- // Get CEECAATHDID
- MOVD CAA(R8), R9
- MOVD 0x3D0(R9), R9
- MOVD R9, ret+0(FP)
-
- RET
diff --git a/vendor/golang.org/x/sys/unix/bluetooth_linux.go b/vendor/golang.org/x/sys/unix/bluetooth_linux.go
deleted file mode 100644
index a178a61..0000000
--- a/vendor/golang.org/x/sys/unix/bluetooth_linux.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Bluetooth sockets and messages
-
-package unix
-
-// Bluetooth Protocols
-const (
- BTPROTO_L2CAP = 0
- BTPROTO_HCI = 1
- BTPROTO_SCO = 2
- BTPROTO_RFCOMM = 3
- BTPROTO_BNEP = 4
- BTPROTO_CMTP = 5
- BTPROTO_HIDP = 6
- BTPROTO_AVDTP = 7
-)
-
-const (
- HCI_CHANNEL_RAW = 0
- HCI_CHANNEL_USER = 1
- HCI_CHANNEL_MONITOR = 2
- HCI_CHANNEL_CONTROL = 3
- HCI_CHANNEL_LOGGING = 4
-)
-
-// Socketoption Level
-const (
- SOL_BLUETOOTH = 0x112
- SOL_HCI = 0x0
- SOL_L2CAP = 0x6
- SOL_RFCOMM = 0x12
- SOL_SCO = 0x11
-)
diff --git a/vendor/golang.org/x/sys/unix/cap_freebsd.go b/vendor/golang.org/x/sys/unix/cap_freebsd.go
deleted file mode 100644
index 0b7c6ad..0000000
--- a/vendor/golang.org/x/sys/unix/cap_freebsd.go
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build freebsd
-// +build freebsd
-
-package unix
-
-import (
- "errors"
- "fmt"
-)
-
-// Go implementation of C mostly found in /usr/src/sys/kern/subr_capability.c
-
-const (
- // This is the version of CapRights this package understands. See C implementation for parallels.
- capRightsGoVersion = CAP_RIGHTS_VERSION_00
- capArSizeMin = CAP_RIGHTS_VERSION_00 + 2
- capArSizeMax = capRightsGoVersion + 2
-)
-
-var (
- bit2idx = []int{
- -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1,
- 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- }
-)
-
-func capidxbit(right uint64) int {
- return int((right >> 57) & 0x1f)
-}
-
-func rightToIndex(right uint64) (int, error) {
- idx := capidxbit(right)
- if idx < 0 || idx >= len(bit2idx) {
- return -2, fmt.Errorf("index for right 0x%x out of range", right)
- }
- return bit2idx[idx], nil
-}
-
-func caprver(right uint64) int {
- return int(right >> 62)
-}
-
-func capver(rights *CapRights) int {
- return caprver(rights.Rights[0])
-}
-
-func caparsize(rights *CapRights) int {
- return capver(rights) + 2
-}
-
-// CapRightsSet sets the permissions in setrights in rights.
-func CapRightsSet(rights *CapRights, setrights []uint64) error {
- // This is essentially a copy of cap_rights_vset()
- if capver(rights) != CAP_RIGHTS_VERSION_00 {
- return fmt.Errorf("bad rights version %d", capver(rights))
- }
-
- n := caparsize(rights)
- if n < capArSizeMin || n > capArSizeMax {
- return errors.New("bad rights size")
- }
-
- for _, right := range setrights {
- if caprver(right) != CAP_RIGHTS_VERSION_00 {
- return errors.New("bad right version")
- }
- i, err := rightToIndex(right)
- if err != nil {
- return err
- }
- if i >= n {
- return errors.New("index overflow")
- }
- if capidxbit(rights.Rights[i]) != capidxbit(right) {
- return errors.New("index mismatch")
- }
- rights.Rights[i] |= right
- if capidxbit(rights.Rights[i]) != capidxbit(right) {
- return errors.New("index mismatch (after assign)")
- }
- }
-
- return nil
-}
-
-// CapRightsClear clears the permissions in clearrights from rights.
-func CapRightsClear(rights *CapRights, clearrights []uint64) error {
- // This is essentially a copy of cap_rights_vclear()
- if capver(rights) != CAP_RIGHTS_VERSION_00 {
- return fmt.Errorf("bad rights version %d", capver(rights))
- }
-
- n := caparsize(rights)
- if n < capArSizeMin || n > capArSizeMax {
- return errors.New("bad rights size")
- }
-
- for _, right := range clearrights {
- if caprver(right) != CAP_RIGHTS_VERSION_00 {
- return errors.New("bad right version")
- }
- i, err := rightToIndex(right)
- if err != nil {
- return err
- }
- if i >= n {
- return errors.New("index overflow")
- }
- if capidxbit(rights.Rights[i]) != capidxbit(right) {
- return errors.New("index mismatch")
- }
- rights.Rights[i] &= ^(right & 0x01FFFFFFFFFFFFFF)
- if capidxbit(rights.Rights[i]) != capidxbit(right) {
- return errors.New("index mismatch (after assign)")
- }
- }
-
- return nil
-}
-
-// CapRightsIsSet checks whether all the permissions in setrights are present in rights.
-func CapRightsIsSet(rights *CapRights, setrights []uint64) (bool, error) {
- // This is essentially a copy of cap_rights_is_vset()
- if capver(rights) != CAP_RIGHTS_VERSION_00 {
- return false, fmt.Errorf("bad rights version %d", capver(rights))
- }
-
- n := caparsize(rights)
- if n < capArSizeMin || n > capArSizeMax {
- return false, errors.New("bad rights size")
- }
-
- for _, right := range setrights {
- if caprver(right) != CAP_RIGHTS_VERSION_00 {
- return false, errors.New("bad right version")
- }
- i, err := rightToIndex(right)
- if err != nil {
- return false, err
- }
- if i >= n {
- return false, errors.New("index overflow")
- }
- if capidxbit(rights.Rights[i]) != capidxbit(right) {
- return false, errors.New("index mismatch")
- }
- if (rights.Rights[i] & right) != right {
- return false, nil
- }
- }
-
- return true, nil
-}
-
-func capright(idx uint64, bit uint64) uint64 {
- return ((1 << (57 + idx)) | bit)
-}
-
-// CapRightsInit returns a pointer to an initialised CapRights structure filled with rights.
-// See man cap_rights_init(3) and rights(4).
-func CapRightsInit(rights []uint64) (*CapRights, error) {
- var r CapRights
- r.Rights[0] = (capRightsGoVersion << 62) | capright(0, 0)
- r.Rights[1] = capright(1, 0)
-
- err := CapRightsSet(&r, rights)
- if err != nil {
- return nil, err
- }
- return &r, nil
-}
-
-// CapRightsLimit reduces the operations permitted on fd to at most those contained in rights.
-// The capability rights on fd can never be increased by CapRightsLimit.
-// See man cap_rights_limit(2) and rights(4).
-func CapRightsLimit(fd uintptr, rights *CapRights) error {
- return capRightsLimit(int(fd), rights)
-}
-
-// CapRightsGet returns a CapRights structure containing the operations permitted on fd.
-// See man cap_rights_get(3) and rights(4).
-func CapRightsGet(fd uintptr) (*CapRights, error) {
- r, err := CapRightsInit(nil)
- if err != nil {
- return nil, err
- }
- err = capRightsGet(capRightsGoVersion, int(fd), r)
- if err != nil {
- return nil, err
- }
- return r, nil
-}
diff --git a/vendor/golang.org/x/sys/unix/constants.go b/vendor/golang.org/x/sys/unix/constants.go
deleted file mode 100644
index 394a396..0000000
--- a/vendor/golang.org/x/sys/unix/constants.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package unix
-
-const (
- R_OK = 0x4
- W_OK = 0x2
- X_OK = 0x1
-)
diff --git a/vendor/golang.org/x/sys/unix/dev_aix_ppc.go b/vendor/golang.org/x/sys/unix/dev_aix_ppc.go
deleted file mode 100644
index 65a9985..0000000
--- a/vendor/golang.org/x/sys/unix/dev_aix_ppc.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix && ppc
-// +build aix,ppc
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used by AIX.
-
-package unix
-
-// Major returns the major component of a Linux device number.
-func Major(dev uint64) uint32 {
- return uint32((dev >> 16) & 0xffff)
-}
-
-// Minor returns the minor component of a Linux device number.
-func Minor(dev uint64) uint32 {
- return uint32(dev & 0xffff)
-}
-
-// Mkdev returns a Linux device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- return uint64(((major) << 16) | (minor))
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_aix_ppc64.go b/vendor/golang.org/x/sys/unix/dev_aix_ppc64.go
deleted file mode 100644
index 8fc08ad..0000000
--- a/vendor/golang.org/x/sys/unix/dev_aix_ppc64.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix && ppc64
-// +build aix,ppc64
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used AIX.
-
-package unix
-
-// Major returns the major component of a Linux device number.
-func Major(dev uint64) uint32 {
- return uint32((dev & 0x3fffffff00000000) >> 32)
-}
-
-// Minor returns the minor component of a Linux device number.
-func Minor(dev uint64) uint32 {
- return uint32((dev & 0x00000000ffffffff) >> 0)
-}
-
-// Mkdev returns a Linux device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- var DEVNO64 uint64
- DEVNO64 = 0x8000000000000000
- return ((uint64(major) << 32) | (uint64(minor) & 0x00000000FFFFFFFF) | DEVNO64)
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_darwin.go b/vendor/golang.org/x/sys/unix/dev_darwin.go
deleted file mode 100644
index 8d1dc0f..0000000
--- a/vendor/golang.org/x/sys/unix/dev_darwin.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used in Darwin's sys/types.h header.
-
-package unix
-
-// Major returns the major component of a Darwin device number.
-func Major(dev uint64) uint32 {
- return uint32((dev >> 24) & 0xff)
-}
-
-// Minor returns the minor component of a Darwin device number.
-func Minor(dev uint64) uint32 {
- return uint32(dev & 0xffffff)
-}
-
-// Mkdev returns a Darwin device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- return (uint64(major) << 24) | uint64(minor)
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_dragonfly.go b/vendor/golang.org/x/sys/unix/dev_dragonfly.go
deleted file mode 100644
index 8502f20..0000000
--- a/vendor/golang.org/x/sys/unix/dev_dragonfly.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used in Dragonfly's sys/types.h header.
-//
-// The information below is extracted and adapted from sys/types.h:
-//
-// Minor gives a cookie instead of an index since in order to avoid changing the
-// meanings of bits 0-15 or wasting time and space shifting bits 16-31 for
-// devices that don't use them.
-
-package unix
-
-// Major returns the major component of a DragonFlyBSD device number.
-func Major(dev uint64) uint32 {
- return uint32((dev >> 8) & 0xff)
-}
-
-// Minor returns the minor component of a DragonFlyBSD device number.
-func Minor(dev uint64) uint32 {
- return uint32(dev & 0xffff00ff)
-}
-
-// Mkdev returns a DragonFlyBSD device number generated from the given major and
-// minor components.
-func Mkdev(major, minor uint32) uint64 {
- return (uint64(major) << 8) | uint64(minor)
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_freebsd.go b/vendor/golang.org/x/sys/unix/dev_freebsd.go
deleted file mode 100644
index eba3b4b..0000000
--- a/vendor/golang.org/x/sys/unix/dev_freebsd.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used in FreeBSD's sys/types.h header.
-//
-// The information below is extracted and adapted from sys/types.h:
-//
-// Minor gives a cookie instead of an index since in order to avoid changing the
-// meanings of bits 0-15 or wasting time and space shifting bits 16-31 for
-// devices that don't use them.
-
-package unix
-
-// Major returns the major component of a FreeBSD device number.
-func Major(dev uint64) uint32 {
- return uint32((dev >> 8) & 0xff)
-}
-
-// Minor returns the minor component of a FreeBSD device number.
-func Minor(dev uint64) uint32 {
- return uint32(dev & 0xffff00ff)
-}
-
-// Mkdev returns a FreeBSD device number generated from the given major and
-// minor components.
-func Mkdev(major, minor uint32) uint64 {
- return (uint64(major) << 8) | uint64(minor)
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_linux.go b/vendor/golang.org/x/sys/unix/dev_linux.go
deleted file mode 100644
index d165d6f..0000000
--- a/vendor/golang.org/x/sys/unix/dev_linux.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used by the Linux kernel and glibc.
-//
-// The information below is extracted and adapted from bits/sysmacros.h in the
-// glibc sources:
-//
-// dev_t in glibc is 64-bit, with 32-bit major and minor numbers. glibc's
-// default encoding is MMMM Mmmm mmmM MMmm, where M is a hex digit of the major
-// number and m is a hex digit of the minor number. This is backward compatible
-// with legacy systems where dev_t is 16 bits wide, encoded as MMmm. It is also
-// backward compatible with the Linux kernel, which for some architectures uses
-// 32-bit dev_t, encoded as mmmM MMmm.
-
-package unix
-
-// Major returns the major component of a Linux device number.
-func Major(dev uint64) uint32 {
- major := uint32((dev & 0x00000000000fff00) >> 8)
- major |= uint32((dev & 0xfffff00000000000) >> 32)
- return major
-}
-
-// Minor returns the minor component of a Linux device number.
-func Minor(dev uint64) uint32 {
- minor := uint32((dev & 0x00000000000000ff) >> 0)
- minor |= uint32((dev & 0x00000ffffff00000) >> 12)
- return minor
-}
-
-// Mkdev returns a Linux device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- dev := (uint64(major) & 0x00000fff) << 8
- dev |= (uint64(major) & 0xfffff000) << 32
- dev |= (uint64(minor) & 0x000000ff) << 0
- dev |= (uint64(minor) & 0xffffff00) << 12
- return dev
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_netbsd.go b/vendor/golang.org/x/sys/unix/dev_netbsd.go
deleted file mode 100644
index b4a203d..0000000
--- a/vendor/golang.org/x/sys/unix/dev_netbsd.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used in NetBSD's sys/types.h header.
-
-package unix
-
-// Major returns the major component of a NetBSD device number.
-func Major(dev uint64) uint32 {
- return uint32((dev & 0x000fff00) >> 8)
-}
-
-// Minor returns the minor component of a NetBSD device number.
-func Minor(dev uint64) uint32 {
- minor := uint32((dev & 0x000000ff) >> 0)
- minor |= uint32((dev & 0xfff00000) >> 12)
- return minor
-}
-
-// Mkdev returns a NetBSD device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- dev := (uint64(major) << 8) & 0x000fff00
- dev |= (uint64(minor) << 12) & 0xfff00000
- dev |= (uint64(minor) << 0) & 0x000000ff
- return dev
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_openbsd.go b/vendor/golang.org/x/sys/unix/dev_openbsd.go
deleted file mode 100644
index f3430c4..0000000
--- a/vendor/golang.org/x/sys/unix/dev_openbsd.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used in OpenBSD's sys/types.h header.
-
-package unix
-
-// Major returns the major component of an OpenBSD device number.
-func Major(dev uint64) uint32 {
- return uint32((dev & 0x0000ff00) >> 8)
-}
-
-// Minor returns the minor component of an OpenBSD device number.
-func Minor(dev uint64) uint32 {
- minor := uint32((dev & 0x000000ff) >> 0)
- minor |= uint32((dev & 0xffff0000) >> 8)
- return minor
-}
-
-// Mkdev returns an OpenBSD device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- dev := (uint64(major) << 8) & 0x0000ff00
- dev |= (uint64(minor) << 8) & 0xffff0000
- dev |= (uint64(minor) << 0) & 0x000000ff
- return dev
-}
diff --git a/vendor/golang.org/x/sys/unix/dev_zos.go b/vendor/golang.org/x/sys/unix/dev_zos.go
deleted file mode 100644
index a388e59..0000000
--- a/vendor/golang.org/x/sys/unix/dev_zos.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build zos && s390x
-// +build zos,s390x
-
-// Functions to access/create device major and minor numbers matching the
-// encoding used by z/OS.
-//
-// The information below is extracted and adapted from macros.
-
-package unix
-
-// Major returns the major component of a z/OS device number.
-func Major(dev uint64) uint32 {
- return uint32((dev >> 16) & 0x0000FFFF)
-}
-
-// Minor returns the minor component of a z/OS device number.
-func Minor(dev uint64) uint32 {
- return uint32(dev & 0x0000FFFF)
-}
-
-// Mkdev returns a z/OS device number generated from the given major and minor
-// components.
-func Mkdev(major, minor uint32) uint64 {
- return (uint64(major) << 16) | uint64(minor)
-}
diff --git a/vendor/golang.org/x/sys/unix/dirent.go b/vendor/golang.org/x/sys/unix/dirent.go
deleted file mode 100644
index e74e5ea..0000000
--- a/vendor/golang.org/x/sys/unix/dirent.go
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package unix
-
-import "unsafe"
-
-// readInt returns the size-bytes unsigned integer in native byte order at offset off.
-func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
- if len(b) < int(off+size) {
- return 0, false
- }
- if isBigEndian {
- return readIntBE(b[off:], size), true
- }
- return readIntLE(b[off:], size), true
-}
-
-func readIntBE(b []byte, size uintptr) uint64 {
- switch size {
- case 1:
- return uint64(b[0])
- case 2:
- _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[1]) | uint64(b[0])<<8
- case 4:
- _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24
- case 8:
- _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
- uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
- default:
- panic("syscall: readInt with unsupported size")
- }
-}
-
-func readIntLE(b []byte, size uintptr) uint64 {
- switch size {
- case 1:
- return uint64(b[0])
- case 2:
- _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[0]) | uint64(b[1])<<8
- case 4:
- _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24
- case 8:
- _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
- default:
- panic("syscall: readInt with unsupported size")
- }
-}
-
-// ParseDirent parses up to max directory entries in buf,
-// appending the names to names. It returns the number of
-// bytes consumed from buf, the number of entries added
-// to names, and the new names slice.
-func ParseDirent(buf []byte, max int, names []string) (consumed int, count int, newnames []string) {
- origlen := len(buf)
- count = 0
- for max != 0 && len(buf) > 0 {
- reclen, ok := direntReclen(buf)
- if !ok || reclen > uint64(len(buf)) {
- return origlen, count, names
- }
- rec := buf[:reclen]
- buf = buf[reclen:]
- ino, ok := direntIno(rec)
- if !ok {
- break
- }
- if ino == 0 { // File absent in directory.
- continue
- }
- const namoff = uint64(unsafe.Offsetof(Dirent{}.Name))
- namlen, ok := direntNamlen(rec)
- if !ok || namoff+namlen > uint64(len(rec)) {
- break
- }
- name := rec[namoff : namoff+namlen]
- for i, c := range name {
- if c == 0 {
- name = name[:i]
- break
- }
- }
- // Check for useless names before allocating a string.
- if string(name) == "." || string(name) == ".." {
- continue
- }
- max--
- count++
- names = append(names, string(name))
- }
- return origlen - len(buf), count, names
-}
diff --git a/vendor/golang.org/x/sys/unix/endian_big.go b/vendor/golang.org/x/sys/unix/endian_big.go
deleted file mode 100644
index a520265..0000000
--- a/vendor/golang.org/x/sys/unix/endian_big.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-//
-//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
-// +build armbe arm64be m68k mips mips64 mips64p32 ppc ppc64 s390 s390x shbe sparc sparc64
-
-package unix
-
-const isBigEndian = true
diff --git a/vendor/golang.org/x/sys/unix/endian_little.go b/vendor/golang.org/x/sys/unix/endian_little.go
deleted file mode 100644
index b0f2bc4..0000000
--- a/vendor/golang.org/x/sys/unix/endian_little.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-//
-//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh
-// +build 386 amd64 amd64p32 alpha arm arm64 loong64 mipsle mips64le mips64p32le nios2 ppc64le riscv riscv64 sh
-
-package unix
-
-const isBigEndian = false
diff --git a/vendor/golang.org/x/sys/unix/env_unix.go b/vendor/golang.org/x/sys/unix/env_unix.go
deleted file mode 100644
index 29ccc4d..0000000
--- a/vendor/golang.org/x/sys/unix/env_unix.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-// Unix environment variables.
-
-package unix
-
-import "syscall"
-
-func Getenv(key string) (value string, found bool) {
- return syscall.Getenv(key)
-}
-
-func Setenv(key, value string) error {
- return syscall.Setenv(key, value)
-}
-
-func Clearenv() {
- syscall.Clearenv()
-}
-
-func Environ() []string {
- return syscall.Environ()
-}
-
-func Unsetenv(key string) error {
- return syscall.Unsetenv(key)
-}
diff --git a/vendor/golang.org/x/sys/unix/epoll_zos.go b/vendor/golang.org/x/sys/unix/epoll_zos.go
deleted file mode 100644
index cedaf7e..0000000
--- a/vendor/golang.org/x/sys/unix/epoll_zos.go
+++ /dev/null
@@ -1,221 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build zos && s390x
-// +build zos,s390x
-
-package unix
-
-import (
- "sync"
-)
-
-// This file simulates epoll on z/OS using poll.
-
-// Analogous to epoll_event on Linux.
-// TODO(neeilan): Pad is because the Linux kernel expects a 96-bit struct. We never pass this to the kernel; remove?
-type EpollEvent struct {
- Events uint32
- Fd int32
- Pad int32
-}
-
-const (
- EPOLLERR = 0x8
- EPOLLHUP = 0x10
- EPOLLIN = 0x1
- EPOLLMSG = 0x400
- EPOLLOUT = 0x4
- EPOLLPRI = 0x2
- EPOLLRDBAND = 0x80
- EPOLLRDNORM = 0x40
- EPOLLWRBAND = 0x200
- EPOLLWRNORM = 0x100
- EPOLL_CTL_ADD = 0x1
- EPOLL_CTL_DEL = 0x2
- EPOLL_CTL_MOD = 0x3
- // The following constants are part of the epoll API, but represent
- // currently unsupported functionality on z/OS.
- // EPOLL_CLOEXEC = 0x80000
- // EPOLLET = 0x80000000
- // EPOLLONESHOT = 0x40000000
- // EPOLLRDHUP = 0x2000 // Typically used with edge-triggered notis
- // EPOLLEXCLUSIVE = 0x10000000 // Exclusive wake-up mode
- // EPOLLWAKEUP = 0x20000000 // Relies on Linux's BLOCK_SUSPEND capability
-)
-
-// TODO(neeilan): We can eliminate these epToPoll / pToEpoll calls by using identical mask values for POLL/EPOLL
-// constants where possible The lower 16 bits of epoll events (uint32) can fit any system poll event (int16).
-
-// epToPollEvt converts epoll event field to poll equivalent.
-// In epoll, Events is a 32-bit field, while poll uses 16 bits.
-func epToPollEvt(events uint32) int16 {
- var ep2p = map[uint32]int16{
- EPOLLIN: POLLIN,
- EPOLLOUT: POLLOUT,
- EPOLLHUP: POLLHUP,
- EPOLLPRI: POLLPRI,
- EPOLLERR: POLLERR,
- }
-
- var pollEvts int16 = 0
- for epEvt, pEvt := range ep2p {
- if (events & epEvt) != 0 {
- pollEvts |= pEvt
- }
- }
-
- return pollEvts
-}
-
-// pToEpollEvt converts 16 bit poll event bitfields to 32-bit epoll event fields.
-func pToEpollEvt(revents int16) uint32 {
- var p2ep = map[int16]uint32{
- POLLIN: EPOLLIN,
- POLLOUT: EPOLLOUT,
- POLLHUP: EPOLLHUP,
- POLLPRI: EPOLLPRI,
- POLLERR: EPOLLERR,
- }
-
- var epollEvts uint32 = 0
- for pEvt, epEvt := range p2ep {
- if (revents & pEvt) != 0 {
- epollEvts |= epEvt
- }
- }
-
- return epollEvts
-}
-
-// Per-process epoll implementation.
-type epollImpl struct {
- mu sync.Mutex
- epfd2ep map[int]*eventPoll
- nextEpfd int
-}
-
-// eventPoll holds a set of file descriptors being watched by the process. A process can have multiple epoll instances.
-// On Linux, this is an in-kernel data structure accessed through a fd.
-type eventPoll struct {
- mu sync.Mutex
- fds map[int]*EpollEvent
-}
-
-// epoll impl for this process.
-var impl epollImpl = epollImpl{
- epfd2ep: make(map[int]*eventPoll),
- nextEpfd: 0,
-}
-
-func (e *epollImpl) epollcreate(size int) (epfd int, err error) {
- e.mu.Lock()
- defer e.mu.Unlock()
- epfd = e.nextEpfd
- e.nextEpfd++
-
- e.epfd2ep[epfd] = &eventPoll{
- fds: make(map[int]*EpollEvent),
- }
- return epfd, nil
-}
-
-func (e *epollImpl) epollcreate1(flag int) (fd int, err error) {
- return e.epollcreate(4)
-}
-
-func (e *epollImpl) epollctl(epfd int, op int, fd int, event *EpollEvent) (err error) {
- e.mu.Lock()
- defer e.mu.Unlock()
-
- ep, ok := e.epfd2ep[epfd]
- if !ok {
-
- return EBADF
- }
-
- switch op {
- case EPOLL_CTL_ADD:
- // TODO(neeilan): When we make epfds and fds disjoint, detect epoll
- // loops here (instances watching each other) and return ELOOP.
- if _, ok := ep.fds[fd]; ok {
- return EEXIST
- }
- ep.fds[fd] = event
- case EPOLL_CTL_MOD:
- if _, ok := ep.fds[fd]; !ok {
- return ENOENT
- }
- ep.fds[fd] = event
- case EPOLL_CTL_DEL:
- if _, ok := ep.fds[fd]; !ok {
- return ENOENT
- }
- delete(ep.fds, fd)
-
- }
- return nil
-}
-
-// Must be called while holding ep.mu
-func (ep *eventPoll) getFds() []int {
- fds := make([]int, len(ep.fds))
- for fd := range ep.fds {
- fds = append(fds, fd)
- }
- return fds
-}
-
-func (e *epollImpl) epollwait(epfd int, events []EpollEvent, msec int) (n int, err error) {
- e.mu.Lock() // in [rare] case of concurrent epollcreate + epollwait
- ep, ok := e.epfd2ep[epfd]
-
- if !ok {
- e.mu.Unlock()
- return 0, EBADF
- }
-
- pollfds := make([]PollFd, 4)
- for fd, epollevt := range ep.fds {
- pollfds = append(pollfds, PollFd{Fd: int32(fd), Events: epToPollEvt(epollevt.Events)})
- }
- e.mu.Unlock()
-
- n, err = Poll(pollfds, msec)
- if err != nil {
- return n, err
- }
-
- i := 0
- for _, pFd := range pollfds {
- if pFd.Revents != 0 {
- events[i] = EpollEvent{Fd: pFd.Fd, Events: pToEpollEvt(pFd.Revents)}
- i++
- }
-
- if i == n {
- break
- }
- }
-
- return n, nil
-}
-
-func EpollCreate(size int) (fd int, err error) {
- return impl.epollcreate(size)
-}
-
-func EpollCreate1(flag int) (fd int, err error) {
- return impl.epollcreate1(flag)
-}
-
-func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
- return impl.epollctl(epfd, op, fd, event)
-}
-
-// Because EpollWait mutates events, the caller is expected to coordinate
-// concurrent access if calling with the same epfd from multiple goroutines.
-func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
- return impl.epollwait(epfd, events, msec)
-}
diff --git a/vendor/golang.org/x/sys/unix/fcntl.go b/vendor/golang.org/x/sys/unix/fcntl.go
deleted file mode 100644
index e9b9912..0000000
--- a/vendor/golang.org/x/sys/unix/fcntl.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build dragonfly || freebsd || linux || netbsd || openbsd
-// +build dragonfly freebsd linux netbsd openbsd
-
-package unix
-
-import "unsafe"
-
-// fcntl64Syscall is usually SYS_FCNTL, but is overridden on 32-bit Linux
-// systems by fcntl_linux_32bit.go to be SYS_FCNTL64.
-var fcntl64Syscall uintptr = SYS_FCNTL
-
-func fcntl(fd int, cmd, arg int) (int, error) {
- valptr, _, errno := Syscall(fcntl64Syscall, uintptr(fd), uintptr(cmd), uintptr(arg))
- var err error
- if errno != 0 {
- err = errno
- }
- return int(valptr), err
-}
-
-// FcntlInt performs a fcntl syscall on fd with the provided command and argument.
-func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
- return fcntl(int(fd), cmd, arg)
-}
-
-// FcntlFlock performs a fcntl syscall for the F_GETLK, F_SETLK or F_SETLKW command.
-func FcntlFlock(fd uintptr, cmd int, lk *Flock_t) error {
- _, _, errno := Syscall(fcntl64Syscall, fd, uintptr(cmd), uintptr(unsafe.Pointer(lk)))
- if errno == 0 {
- return nil
- }
- return errno
-}
diff --git a/vendor/golang.org/x/sys/unix/fcntl_darwin.go b/vendor/golang.org/x/sys/unix/fcntl_darwin.go
deleted file mode 100644
index a9911c7..0000000
--- a/vendor/golang.org/x/sys/unix/fcntl_darwin.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unix
-
-import "unsafe"
-
-// FcntlInt performs a fcntl syscall on fd with the provided command and argument.
-func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
- return fcntl(int(fd), cmd, arg)
-}
-
-// FcntlFlock performs a fcntl syscall for the F_GETLK, F_SETLK or F_SETLKW command.
-func FcntlFlock(fd uintptr, cmd int, lk *Flock_t) error {
- _, err := fcntl(int(fd), cmd, int(uintptr(unsafe.Pointer(lk))))
- return err
-}
-
-// FcntlFstore performs a fcntl syscall for the F_PREALLOCATE command.
-func FcntlFstore(fd uintptr, cmd int, fstore *Fstore_t) error {
- _, err := fcntl(int(fd), cmd, int(uintptr(unsafe.Pointer(fstore))))
- return err
-}
diff --git a/vendor/golang.org/x/sys/unix/fcntl_linux_32bit.go b/vendor/golang.org/x/sys/unix/fcntl_linux_32bit.go
deleted file mode 100644
index 29d4480..0000000
--- a/vendor/golang.org/x/sys/unix/fcntl_linux_32bit.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (linux && 386) || (linux && arm) || (linux && mips) || (linux && mipsle) || (linux && ppc)
-// +build linux,386 linux,arm linux,mips linux,mipsle linux,ppc
-
-package unix
-
-func init() {
- // On 32-bit Linux systems, the fcntl syscall that matches Go's
- // Flock_t type is SYS_FCNTL64, not SYS_FCNTL.
- fcntl64Syscall = SYS_FCNTL64
-}
diff --git a/vendor/golang.org/x/sys/unix/fdset.go b/vendor/golang.org/x/sys/unix/fdset.go
deleted file mode 100644
index a8068f9..0000000
--- a/vendor/golang.org/x/sys/unix/fdset.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-
-package unix
-
-// Set adds fd to the set fds.
-func (fds *FdSet) Set(fd int) {
- fds.Bits[fd/NFDBITS] |= (1 << (uintptr(fd) % NFDBITS))
-}
-
-// Clear removes fd from the set fds.
-func (fds *FdSet) Clear(fd int) {
- fds.Bits[fd/NFDBITS] &^= (1 << (uintptr(fd) % NFDBITS))
-}
-
-// IsSet returns whether fd is in the set fds.
-func (fds *FdSet) IsSet(fd int) bool {
- return fds.Bits[fd/NFDBITS]&(1<<(uintptr(fd)%NFDBITS)) != 0
-}
-
-// Zero clears the set fds.
-func (fds *FdSet) Zero() {
- for i := range fds.Bits {
- fds.Bits[i] = 0
- }
-}
diff --git a/vendor/golang.org/x/sys/unix/fstatfs_zos.go b/vendor/golang.org/x/sys/unix/fstatfs_zos.go
deleted file mode 100644
index e377cc9..0000000
--- a/vendor/golang.org/x/sys/unix/fstatfs_zos.go
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build zos && s390x
-// +build zos,s390x
-
-package unix
-
-import (
- "unsafe"
-)
-
-// This file simulates fstatfs on z/OS using fstatvfs and w_getmntent.
-
-func Fstatfs(fd int, stat *Statfs_t) (err error) {
- var stat_v Statvfs_t
- err = Fstatvfs(fd, &stat_v)
- if err == nil {
- // populate stat
- stat.Type = 0
- stat.Bsize = stat_v.Bsize
- stat.Blocks = stat_v.Blocks
- stat.Bfree = stat_v.Bfree
- stat.Bavail = stat_v.Bavail
- stat.Files = stat_v.Files
- stat.Ffree = stat_v.Ffree
- stat.Fsid = stat_v.Fsid
- stat.Namelen = stat_v.Namemax
- stat.Frsize = stat_v.Frsize
- stat.Flags = stat_v.Flag
- for passn := 0; passn < 5; passn++ {
- switch passn {
- case 0:
- err = tryGetmntent64(stat)
- break
- case 1:
- err = tryGetmntent128(stat)
- break
- case 2:
- err = tryGetmntent256(stat)
- break
- case 3:
- err = tryGetmntent512(stat)
- break
- case 4:
- err = tryGetmntent1024(stat)
- break
- default:
- break
- }
- //proceed to return if: err is nil (found), err is nonnil but not ERANGE (another error occurred)
- if err == nil || err != nil && err != ERANGE {
- break
- }
- }
- }
- return err
-}
-
-func tryGetmntent64(stat *Statfs_t) (err error) {
- var mnt_ent_buffer struct {
- header W_Mnth
- filesys_info [64]W_Mntent
- }
- var buffer_size int = int(unsafe.Sizeof(mnt_ent_buffer))
- fs_count, err := W_Getmntent((*byte)(unsafe.Pointer(&mnt_ent_buffer)), buffer_size)
- if err != nil {
- return err
- }
- err = ERANGE //return ERANGE if no match is found in this batch
- for i := 0; i < fs_count; i++ {
- if stat.Fsid == uint64(mnt_ent_buffer.filesys_info[i].Dev) {
- stat.Type = uint32(mnt_ent_buffer.filesys_info[i].Fstname[0])
- err = nil
- break
- }
- }
- return err
-}
-
-func tryGetmntent128(stat *Statfs_t) (err error) {
- var mnt_ent_buffer struct {
- header W_Mnth
- filesys_info [128]W_Mntent
- }
- var buffer_size int = int(unsafe.Sizeof(mnt_ent_buffer))
- fs_count, err := W_Getmntent((*byte)(unsafe.Pointer(&mnt_ent_buffer)), buffer_size)
- if err != nil {
- return err
- }
- err = ERANGE //return ERANGE if no match is found in this batch
- for i := 0; i < fs_count; i++ {
- if stat.Fsid == uint64(mnt_ent_buffer.filesys_info[i].Dev) {
- stat.Type = uint32(mnt_ent_buffer.filesys_info[i].Fstname[0])
- err = nil
- break
- }
- }
- return err
-}
-
-func tryGetmntent256(stat *Statfs_t) (err error) {
- var mnt_ent_buffer struct {
- header W_Mnth
- filesys_info [256]W_Mntent
- }
- var buffer_size int = int(unsafe.Sizeof(mnt_ent_buffer))
- fs_count, err := W_Getmntent((*byte)(unsafe.Pointer(&mnt_ent_buffer)), buffer_size)
- if err != nil {
- return err
- }
- err = ERANGE //return ERANGE if no match is found in this batch
- for i := 0; i < fs_count; i++ {
- if stat.Fsid == uint64(mnt_ent_buffer.filesys_info[i].Dev) {
- stat.Type = uint32(mnt_ent_buffer.filesys_info[i].Fstname[0])
- err = nil
- break
- }
- }
- return err
-}
-
-func tryGetmntent512(stat *Statfs_t) (err error) {
- var mnt_ent_buffer struct {
- header W_Mnth
- filesys_info [512]W_Mntent
- }
- var buffer_size int = int(unsafe.Sizeof(mnt_ent_buffer))
- fs_count, err := W_Getmntent((*byte)(unsafe.Pointer(&mnt_ent_buffer)), buffer_size)
- if err != nil {
- return err
- }
- err = ERANGE //return ERANGE if no match is found in this batch
- for i := 0; i < fs_count; i++ {
- if stat.Fsid == uint64(mnt_ent_buffer.filesys_info[i].Dev) {
- stat.Type = uint32(mnt_ent_buffer.filesys_info[i].Fstname[0])
- err = nil
- break
- }
- }
- return err
-}
-
-func tryGetmntent1024(stat *Statfs_t) (err error) {
- var mnt_ent_buffer struct {
- header W_Mnth
- filesys_info [1024]W_Mntent
- }
- var buffer_size int = int(unsafe.Sizeof(mnt_ent_buffer))
- fs_count, err := W_Getmntent((*byte)(unsafe.Pointer(&mnt_ent_buffer)), buffer_size)
- if err != nil {
- return err
- }
- err = ERANGE //return ERANGE if no match is found in this batch
- for i := 0; i < fs_count; i++ {
- if stat.Fsid == uint64(mnt_ent_buffer.filesys_info[i].Dev) {
- stat.Type = uint32(mnt_ent_buffer.filesys_info[i].Fstname[0])
- err = nil
- break
- }
- }
- return err
-}
diff --git a/vendor/golang.org/x/sys/unix/gccgo.go b/vendor/golang.org/x/sys/unix/gccgo.go
deleted file mode 100644
index 0dee232..0000000
--- a/vendor/golang.org/x/sys/unix/gccgo.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gccgo && !aix
-// +build gccgo,!aix
-
-package unix
-
-import "syscall"
-
-// We can't use the gc-syntax .s files for gccgo. On the plus side
-// much of the functionality can be written directly in Go.
-
-func realSyscallNoError(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r uintptr)
-
-func realSyscall(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r, errno uintptr)
-
-func SyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr) {
- syscall.Entersyscall()
- r := realSyscallNoError(trap, a1, a2, a3, 0, 0, 0, 0, 0, 0)
- syscall.Exitsyscall()
- return r, 0
-}
-
-func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
- syscall.Entersyscall()
- r, errno := realSyscall(trap, a1, a2, a3, 0, 0, 0, 0, 0, 0)
- syscall.Exitsyscall()
- return r, 0, syscall.Errno(errno)
-}
-
-func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno) {
- syscall.Entersyscall()
- r, errno := realSyscall(trap, a1, a2, a3, a4, a5, a6, 0, 0, 0)
- syscall.Exitsyscall()
- return r, 0, syscall.Errno(errno)
-}
-
-func Syscall9(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) {
- syscall.Entersyscall()
- r, errno := realSyscall(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9)
- syscall.Exitsyscall()
- return r, 0, syscall.Errno(errno)
-}
-
-func RawSyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr) {
- r := realSyscallNoError(trap, a1, a2, a3, 0, 0, 0, 0, 0, 0)
- return r, 0
-}
-
-func RawSyscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
- r, errno := realSyscall(trap, a1, a2, a3, 0, 0, 0, 0, 0, 0)
- return r, 0, syscall.Errno(errno)
-}
-
-func RawSyscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno) {
- r, errno := realSyscall(trap, a1, a2, a3, a4, a5, a6, 0, 0, 0)
- return r, 0, syscall.Errno(errno)
-}
diff --git a/vendor/golang.org/x/sys/unix/gccgo_c.c b/vendor/golang.org/x/sys/unix/gccgo_c.c
deleted file mode 100644
index 2cb1fef..0000000
--- a/vendor/golang.org/x/sys/unix/gccgo_c.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build gccgo
-// +build !aix
-
-#include
-#include
-#include
-
-#define _STRINGIFY2_(x) #x
-#define _STRINGIFY_(x) _STRINGIFY2_(x)
-#define GOSYM_PREFIX _STRINGIFY_(__USER_LABEL_PREFIX__)
-
-// Call syscall from C code because the gccgo support for calling from
-// Go to C does not support varargs functions.
-
-struct ret {
- uintptr_t r;
- uintptr_t err;
-};
-
-struct ret gccgoRealSyscall(uintptr_t trap, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9)
- __asm__(GOSYM_PREFIX GOPKGPATH ".realSyscall");
-
-struct ret
-gccgoRealSyscall(uintptr_t trap, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9)
-{
- struct ret r;
-
- errno = 0;
- r.r = syscall(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9);
- r.err = errno;
- return r;
-}
-
-uintptr_t gccgoRealSyscallNoError(uintptr_t trap, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9)
- __asm__(GOSYM_PREFIX GOPKGPATH ".realSyscallNoError");
-
-uintptr_t
-gccgoRealSyscallNoError(uintptr_t trap, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9)
-{
- return syscall(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9);
-}
diff --git a/vendor/golang.org/x/sys/unix/gccgo_linux_amd64.go b/vendor/golang.org/x/sys/unix/gccgo_linux_amd64.go
deleted file mode 100644
index e60e49a..0000000
--- a/vendor/golang.org/x/sys/unix/gccgo_linux_amd64.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gccgo && linux && amd64
-// +build gccgo,linux,amd64
-
-package unix
-
-import "syscall"
-
-//extern gettimeofday
-func realGettimeofday(*Timeval, *byte) int32
-
-func gettimeofday(tv *Timeval) (err syscall.Errno) {
- r := realGettimeofday(tv, nil)
- if r < 0 {
- return syscall.GetErrno()
- }
- return 0
-}
diff --git a/vendor/golang.org/x/sys/unix/ifreq_linux.go b/vendor/golang.org/x/sys/unix/ifreq_linux.go
deleted file mode 100644
index 15721a5..0000000
--- a/vendor/golang.org/x/sys/unix/ifreq_linux.go
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build linux
-// +build linux
-
-package unix
-
-import (
- "unsafe"
-)
-
-// Helpers for dealing with ifreq since it contains a union and thus requires a
-// lot of unsafe.Pointer casts to use properly.
-
-// An Ifreq is a type-safe wrapper around the raw ifreq struct. An Ifreq
-// contains an interface name and a union of arbitrary data which can be
-// accessed using the Ifreq's methods. To create an Ifreq, use the NewIfreq
-// function.
-//
-// Use the Name method to access the stored interface name. The union data
-// fields can be get and set using the following methods:
-// - Uint16/SetUint16: flags
-// - Uint32/SetUint32: ifindex, metric, mtu
-type Ifreq struct{ raw ifreq }
-
-// NewIfreq creates an Ifreq with the input network interface name after
-// validating the name does not exceed IFNAMSIZ-1 (trailing NULL required)
-// bytes.
-func NewIfreq(name string) (*Ifreq, error) {
- // Leave room for terminating NULL byte.
- if len(name) >= IFNAMSIZ {
- return nil, EINVAL
- }
-
- var ifr ifreq
- copy(ifr.Ifrn[:], name)
-
- return &Ifreq{raw: ifr}, nil
-}
-
-// TODO(mdlayher): get/set methods for hardware address sockaddr, char array, etc.
-
-// Name returns the interface name associated with the Ifreq.
-func (ifr *Ifreq) Name() string {
- return ByteSliceToString(ifr.raw.Ifrn[:])
-}
-
-// According to netdevice(7), only AF_INET addresses are returned for numerous
-// sockaddr ioctls. For convenience, we expose these as Inet4Addr since the Port
-// field and other data is always empty.
-
-// Inet4Addr returns the Ifreq union data from an embedded sockaddr as a C
-// in_addr/Go []byte (4-byte IPv4 address) value. If the sockaddr family is not
-// AF_INET, an error is returned.
-func (ifr *Ifreq) Inet4Addr() ([]byte, error) {
- raw := *(*RawSockaddrInet4)(unsafe.Pointer(&ifr.raw.Ifru[:SizeofSockaddrInet4][0]))
- if raw.Family != AF_INET {
- // Cannot safely interpret raw.Addr bytes as an IPv4 address.
- return nil, EINVAL
- }
-
- return raw.Addr[:], nil
-}
-
-// SetInet4Addr sets a C in_addr/Go []byte (4-byte IPv4 address) value in an
-// embedded sockaddr within the Ifreq's union data. v must be 4 bytes in length
-// or an error will be returned.
-func (ifr *Ifreq) SetInet4Addr(v []byte) error {
- if len(v) != 4 {
- return EINVAL
- }
-
- var addr [4]byte
- copy(addr[:], v)
-
- ifr.clear()
- *(*RawSockaddrInet4)(
- unsafe.Pointer(&ifr.raw.Ifru[:SizeofSockaddrInet4][0]),
- ) = RawSockaddrInet4{
- // Always set IP family as ioctls would require it anyway.
- Family: AF_INET,
- Addr: addr,
- }
-
- return nil
-}
-
-// Uint16 returns the Ifreq union data as a C short/Go uint16 value.
-func (ifr *Ifreq) Uint16() uint16 {
- return *(*uint16)(unsafe.Pointer(&ifr.raw.Ifru[:2][0]))
-}
-
-// SetUint16 sets a C short/Go uint16 value as the Ifreq's union data.
-func (ifr *Ifreq) SetUint16(v uint16) {
- ifr.clear()
- *(*uint16)(unsafe.Pointer(&ifr.raw.Ifru[:2][0])) = v
-}
-
-// Uint32 returns the Ifreq union data as a C int/Go uint32 value.
-func (ifr *Ifreq) Uint32() uint32 {
- return *(*uint32)(unsafe.Pointer(&ifr.raw.Ifru[:4][0]))
-}
-
-// SetUint32 sets a C int/Go uint32 value as the Ifreq's union data.
-func (ifr *Ifreq) SetUint32(v uint32) {
- ifr.clear()
- *(*uint32)(unsafe.Pointer(&ifr.raw.Ifru[:4][0])) = v
-}
-
-// clear zeroes the ifreq's union field to prevent trailing garbage data from
-// being sent to the kernel if an ifreq is reused.
-func (ifr *Ifreq) clear() {
- for i := range ifr.raw.Ifru {
- ifr.raw.Ifru[i] = 0
- }
-}
-
-// TODO(mdlayher): export as IfreqData? For now we can provide helpers such as
-// IoctlGetEthtoolDrvinfo which use these APIs under the hood.
-
-// An ifreqData is an Ifreq which carries pointer data. To produce an ifreqData,
-// use the Ifreq.withData method.
-type ifreqData struct {
- name [IFNAMSIZ]byte
- // A type separate from ifreq is required in order to comply with the
- // unsafe.Pointer rules since the "pointer-ness" of data would not be
- // preserved if it were cast into the byte array of a raw ifreq.
- data unsafe.Pointer
- // Pad to the same size as ifreq.
- _ [len(ifreq{}.Ifru) - SizeofPtr]byte
-}
-
-// withData produces an ifreqData with the pointer p set for ioctls which require
-// arbitrary pointer data.
-func (ifr Ifreq) withData(p unsafe.Pointer) ifreqData {
- return ifreqData{
- name: ifr.raw.Ifrn,
- data: p,
- }
-}
diff --git a/vendor/golang.org/x/sys/unix/ioctl.go b/vendor/golang.org/x/sys/unix/ioctl.go
deleted file mode 100644
index 6c7ad05..0000000
--- a/vendor/golang.org/x/sys/unix/ioctl.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package unix
-
-import (
- "runtime"
- "unsafe"
-)
-
-// ioctl itself should not be exposed directly, but additional get/set
-// functions for specific types are permissible.
-
-// IoctlSetInt performs an ioctl operation which sets an integer value
-// on fd, using the specified request number.
-func IoctlSetInt(fd int, req uint, value int) error {
- return ioctl(fd, req, uintptr(value))
-}
-
-// IoctlSetPointerInt performs an ioctl operation which sets an
-// integer value on fd, using the specified request number. The ioctl
-// argument is called with a pointer to the integer value, rather than
-// passing the integer value directly.
-func IoctlSetPointerInt(fd int, req uint, value int) error {
- v := int32(value)
- return ioctl(fd, req, uintptr(unsafe.Pointer(&v)))
-}
-
-// IoctlSetWinsize performs an ioctl on fd with a *Winsize argument.
-//
-// To change fd's window size, the req argument should be TIOCSWINSZ.
-func IoctlSetWinsize(fd int, req uint, value *Winsize) error {
- // TODO: if we get the chance, remove the req parameter and
- // hardcode TIOCSWINSZ.
- err := ioctl(fd, req, uintptr(unsafe.Pointer(value)))
- runtime.KeepAlive(value)
- return err
-}
-
-// IoctlSetTermios performs an ioctl on fd with a *Termios.
-//
-// The req value will usually be TCSETA or TIOCSETA.
-func IoctlSetTermios(fd int, req uint, value *Termios) error {
- // TODO: if we get the chance, remove the req parameter.
- err := ioctl(fd, req, uintptr(unsafe.Pointer(value)))
- runtime.KeepAlive(value)
- return err
-}
-
-// IoctlGetInt performs an ioctl operation which gets an integer value
-// from fd, using the specified request number.
-//
-// A few ioctl requests use the return value as an output parameter;
-// for those, IoctlRetInt should be used instead of this function.
-func IoctlGetInt(fd int, req uint) (int, error) {
- var value int
- err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
- return value, err
-}
-
-func IoctlGetWinsize(fd int, req uint) (*Winsize, error) {
- var value Winsize
- err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
- return &value, err
-}
-
-func IoctlGetTermios(fd int, req uint) (*Termios, error) {
- var value Termios
- err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
- return &value, err
-}
diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go
deleted file mode 100644
index 0d12c08..0000000
--- a/vendor/golang.org/x/sys/unix/ioctl_linux.go
+++ /dev/null
@@ -1,233 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unix
-
-import "unsafe"
-
-// IoctlRetInt performs an ioctl operation specified by req on a device
-// associated with opened file descriptor fd, and returns a non-negative
-// integer that is returned by the ioctl syscall.
-func IoctlRetInt(fd int, req uint) (int, error) {
- ret, _, err := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), 0)
- if err != 0 {
- return 0, err
- }
- return int(ret), nil
-}
-
-func IoctlGetUint32(fd int, req uint) (uint32, error) {
- var value uint32
- err := ioctlPtr(fd, req, unsafe.Pointer(&value))
- return value, err
-}
-
-func IoctlGetRTCTime(fd int) (*RTCTime, error) {
- var value RTCTime
- err := ioctlPtr(fd, RTC_RD_TIME, unsafe.Pointer(&value))
- return &value, err
-}
-
-func IoctlSetRTCTime(fd int, value *RTCTime) error {
- return ioctlPtr(fd, RTC_SET_TIME, unsafe.Pointer(value))
-}
-
-func IoctlGetRTCWkAlrm(fd int) (*RTCWkAlrm, error) {
- var value RTCWkAlrm
- err := ioctlPtr(fd, RTC_WKALM_RD, unsafe.Pointer(&value))
- return &value, err
-}
-
-func IoctlSetRTCWkAlrm(fd int, value *RTCWkAlrm) error {
- return ioctlPtr(fd, RTC_WKALM_SET, unsafe.Pointer(value))
-}
-
-// IoctlGetEthtoolDrvinfo fetches ethtool driver information for the network
-// device specified by ifname.
-func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) {
- ifr, err := NewIfreq(ifname)
- if err != nil {
- return nil, err
- }
-
- value := EthtoolDrvinfo{Cmd: ETHTOOL_GDRVINFO}
- ifrd := ifr.withData(unsafe.Pointer(&value))
-
- err = ioctlIfreqData(fd, SIOCETHTOOL, &ifrd)
- return &value, err
-}
-
-// IoctlGetWatchdogInfo fetches information about a watchdog device from the
-// Linux watchdog API. For more information, see:
-// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
-func IoctlGetWatchdogInfo(fd int) (*WatchdogInfo, error) {
- var value WatchdogInfo
- err := ioctlPtr(fd, WDIOC_GETSUPPORT, unsafe.Pointer(&value))
- return &value, err
-}
-
-// IoctlWatchdogKeepalive issues a keepalive ioctl to a watchdog device. For
-// more information, see:
-// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
-func IoctlWatchdogKeepalive(fd int) error {
- // arg is ignored and not a pointer, so ioctl is fine instead of ioctlPtr.
- return ioctl(fd, WDIOC_KEEPALIVE, 0)
-}
-
-// IoctlFileCloneRange performs an FICLONERANGE ioctl operation to clone the
-// range of data conveyed in value to the file associated with the file
-// descriptor destFd. See the ioctl_ficlonerange(2) man page for details.
-func IoctlFileCloneRange(destFd int, value *FileCloneRange) error {
- return ioctlPtr(destFd, FICLONERANGE, unsafe.Pointer(value))
-}
-
-// IoctlFileClone performs an FICLONE ioctl operation to clone the entire file
-// associated with the file description srcFd to the file associated with the
-// file descriptor destFd. See the ioctl_ficlone(2) man page for details.
-func IoctlFileClone(destFd, srcFd int) error {
- return ioctl(destFd, FICLONE, uintptr(srcFd))
-}
-
-type FileDedupeRange struct {
- Src_offset uint64
- Src_length uint64
- Reserved1 uint16
- Reserved2 uint32
- Info []FileDedupeRangeInfo
-}
-
-type FileDedupeRangeInfo struct {
- Dest_fd int64
- Dest_offset uint64
- Bytes_deduped uint64
- Status int32
- Reserved uint32
-}
-
-// IoctlFileDedupeRange performs an FIDEDUPERANGE ioctl operation to share the
-// range of data conveyed in value from the file associated with the file
-// descriptor srcFd to the value.Info destinations. See the
-// ioctl_fideduperange(2) man page for details.
-func IoctlFileDedupeRange(srcFd int, value *FileDedupeRange) error {
- buf := make([]byte, SizeofRawFileDedupeRange+
- len(value.Info)*SizeofRawFileDedupeRangeInfo)
- rawrange := (*RawFileDedupeRange)(unsafe.Pointer(&buf[0]))
- rawrange.Src_offset = value.Src_offset
- rawrange.Src_length = value.Src_length
- rawrange.Dest_count = uint16(len(value.Info))
- rawrange.Reserved1 = value.Reserved1
- rawrange.Reserved2 = value.Reserved2
-
- for i := range value.Info {
- rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
- uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
- uintptr(i*SizeofRawFileDedupeRangeInfo)))
- rawinfo.Dest_fd = value.Info[i].Dest_fd
- rawinfo.Dest_offset = value.Info[i].Dest_offset
- rawinfo.Bytes_deduped = value.Info[i].Bytes_deduped
- rawinfo.Status = value.Info[i].Status
- rawinfo.Reserved = value.Info[i].Reserved
- }
-
- err := ioctlPtr(srcFd, FIDEDUPERANGE, unsafe.Pointer(&buf[0]))
-
- // Output
- for i := range value.Info {
- rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
- uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
- uintptr(i*SizeofRawFileDedupeRangeInfo)))
- value.Info[i].Dest_fd = rawinfo.Dest_fd
- value.Info[i].Dest_offset = rawinfo.Dest_offset
- value.Info[i].Bytes_deduped = rawinfo.Bytes_deduped
- value.Info[i].Status = rawinfo.Status
- value.Info[i].Reserved = rawinfo.Reserved
- }
-
- return err
-}
-
-func IoctlHIDGetDesc(fd int, value *HIDRawReportDescriptor) error {
- return ioctlPtr(fd, HIDIOCGRDESC, unsafe.Pointer(value))
-}
-
-func IoctlHIDGetRawInfo(fd int) (*HIDRawDevInfo, error) {
- var value HIDRawDevInfo
- err := ioctlPtr(fd, HIDIOCGRAWINFO, unsafe.Pointer(&value))
- return &value, err
-}
-
-func IoctlHIDGetRawName(fd int) (string, error) {
- var value [_HIDIOCGRAWNAME_LEN]byte
- err := ioctlPtr(fd, _HIDIOCGRAWNAME, unsafe.Pointer(&value[0]))
- return ByteSliceToString(value[:]), err
-}
-
-func IoctlHIDGetRawPhys(fd int) (string, error) {
- var value [_HIDIOCGRAWPHYS_LEN]byte
- err := ioctlPtr(fd, _HIDIOCGRAWPHYS, unsafe.Pointer(&value[0]))
- return ByteSliceToString(value[:]), err
-}
-
-func IoctlHIDGetRawUniq(fd int) (string, error) {
- var value [_HIDIOCGRAWUNIQ_LEN]byte
- err := ioctlPtr(fd, _HIDIOCGRAWUNIQ, unsafe.Pointer(&value[0]))
- return ByteSliceToString(value[:]), err
-}
-
-// IoctlIfreq performs an ioctl using an Ifreq structure for input and/or
-// output. See the netdevice(7) man page for details.
-func IoctlIfreq(fd int, req uint, value *Ifreq) error {
- // It is possible we will add more fields to *Ifreq itself later to prevent
- // misuse, so pass the raw *ifreq directly.
- return ioctlPtr(fd, req, unsafe.Pointer(&value.raw))
-}
-
-// TODO(mdlayher): export if and when IfreqData is exported.
-
-// ioctlIfreqData performs an ioctl using an ifreqData structure for input
-// and/or output. See the netdevice(7) man page for details.
-func ioctlIfreqData(fd int, req uint, value *ifreqData) error {
- // The memory layout of IfreqData (type-safe) and ifreq (not type-safe) are
- // identical so pass *IfreqData directly.
- return ioctlPtr(fd, req, unsafe.Pointer(value))
-}
-
-// IoctlKCMClone attaches a new file descriptor to a multiplexor by cloning an
-// existing KCM socket, returning a structure containing the file descriptor of
-// the new socket.
-func IoctlKCMClone(fd int) (*KCMClone, error) {
- var info KCMClone
- if err := ioctlPtr(fd, SIOCKCMCLONE, unsafe.Pointer(&info)); err != nil {
- return nil, err
- }
-
- return &info, nil
-}
-
-// IoctlKCMAttach attaches a TCP socket and associated BPF program file
-// descriptor to a multiplexor.
-func IoctlKCMAttach(fd int, info KCMAttach) error {
- return ioctlPtr(fd, SIOCKCMATTACH, unsafe.Pointer(&info))
-}
-
-// IoctlKCMUnattach unattaches a TCP socket file descriptor from a multiplexor.
-func IoctlKCMUnattach(fd int, info KCMUnattach) error {
- return ioctlPtr(fd, SIOCKCMUNATTACH, unsafe.Pointer(&info))
-}
-
-// IoctlLoopGetStatus64 gets the status of the loop device associated with the
-// file descriptor fd using the LOOP_GET_STATUS64 operation.
-func IoctlLoopGetStatus64(fd int) (*LoopInfo64, error) {
- var value LoopInfo64
- if err := ioctlPtr(fd, LOOP_GET_STATUS64, unsafe.Pointer(&value)); err != nil {
- return nil, err
- }
- return &value, nil
-}
-
-// IoctlLoopSetStatus64 sets the status of the loop device associated with the
-// file descriptor fd using the LOOP_SET_STATUS64 operation.
-func IoctlLoopSetStatus64(fd int, value *LoopInfo64) error {
- return ioctlPtr(fd, LOOP_SET_STATUS64, unsafe.Pointer(value))
-}
diff --git a/vendor/golang.org/x/sys/unix/ioctl_zos.go b/vendor/golang.org/x/sys/unix/ioctl_zos.go
deleted file mode 100644
index 5384e7d..0000000
--- a/vendor/golang.org/x/sys/unix/ioctl_zos.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build zos && s390x
-// +build zos,s390x
-
-package unix
-
-import (
- "runtime"
- "unsafe"
-)
-
-// ioctl itself should not be exposed directly, but additional get/set
-// functions for specific types are permissible.
-
-// IoctlSetInt performs an ioctl operation which sets an integer value
-// on fd, using the specified request number.
-func IoctlSetInt(fd int, req uint, value int) error {
- return ioctl(fd, req, uintptr(value))
-}
-
-// IoctlSetWinsize performs an ioctl on fd with a *Winsize argument.
-//
-// To change fd's window size, the req argument should be TIOCSWINSZ.
-func IoctlSetWinsize(fd int, req uint, value *Winsize) error {
- // TODO: if we get the chance, remove the req parameter and
- // hardcode TIOCSWINSZ.
- err := ioctl(fd, req, uintptr(unsafe.Pointer(value)))
- runtime.KeepAlive(value)
- return err
-}
-
-// IoctlSetTermios performs an ioctl on fd with a *Termios.
-//
-// The req value is expected to be TCSETS, TCSETSW, or TCSETSF
-func IoctlSetTermios(fd int, req uint, value *Termios) error {
- if (req != TCSETS) && (req != TCSETSW) && (req != TCSETSF) {
- return ENOSYS
- }
- err := Tcsetattr(fd, int(req), value)
- runtime.KeepAlive(value)
- return err
-}
-
-// IoctlGetInt performs an ioctl operation which gets an integer value
-// from fd, using the specified request number.
-//
-// A few ioctl requests use the return value as an output parameter;
-// for those, IoctlRetInt should be used instead of this function.
-func IoctlGetInt(fd int, req uint) (int, error) {
- var value int
- err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
- return value, err
-}
-
-func IoctlGetWinsize(fd int, req uint) (*Winsize, error) {
- var value Winsize
- err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
- return &value, err
-}
-
-// IoctlGetTermios performs an ioctl on fd with a *Termios.
-//
-// The req value is expected to be TCGETS
-func IoctlGetTermios(fd int, req uint) (*Termios, error) {
- var value Termios
- if req != TCGETS {
- return &value, ENOSYS
- }
- err := Tcgetattr(fd, &value)
- return &value, err
-}
diff --git a/vendor/golang.org/x/sys/unix/mkall.sh b/vendor/golang.org/x/sys/unix/mkall.sh
deleted file mode 100644
index 1b2b424..0000000
--- a/vendor/golang.org/x/sys/unix/mkall.sh
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# This script runs or (given -n) prints suggested commands to generate files for
-# the Architecture/OS specified by the GOARCH and GOOS environment variables.
-# See README.md for more information about how the build system works.
-
-GOOSARCH="${GOOS}_${GOARCH}"
-
-# defaults
-mksyscall="go run mksyscall.go"
-mkerrors="./mkerrors.sh"
-zerrors="zerrors_$GOOSARCH.go"
-mksysctl=""
-zsysctl="zsysctl_$GOOSARCH.go"
-mksysnum=
-mktypes=
-mkasm=
-run="sh"
-cmd=""
-
-case "$1" in
--syscalls)
- for i in zsyscall*go
- do
- # Run the command line that appears in the first line
- # of the generated file to regenerate it.
- sed 1q $i | sed 's;^// ;;' | sh > _$i && gofmt < _$i > $i
- rm _$i
- done
- exit 0
- ;;
--n)
- run="cat"
- cmd="echo"
- shift
-esac
-
-case "$#" in
-0)
- ;;
-*)
- echo 'usage: mkall.sh [-n]' 1>&2
- exit 2
-esac
-
-if [[ "$GOOS" = "linux" ]]; then
- # Use the Docker-based build system
- # Files generated through docker (use $cmd so you can Ctl-C the build or run)
- $cmd docker build --tag generate:$GOOS $GOOS
- $cmd docker run --interactive --tty --volume $(cd -- "$(dirname -- "$0")/.." && /bin/pwd):/build generate:$GOOS
- exit
-fi
-
-GOOSARCH_in=syscall_$GOOSARCH.go
-case "$GOOSARCH" in
-_* | *_ | _)
- echo 'undefined $GOOS_$GOARCH:' "$GOOSARCH" 1>&2
- exit 1
- ;;
-aix_ppc)
- mkerrors="$mkerrors -maix32"
- mksyscall="go run mksyscall_aix_ppc.go -aix"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-aix_ppc64)
- mkerrors="$mkerrors -maix64"
- mksyscall="go run mksyscall_aix_ppc64.go -aix"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-darwin_amd64)
- mkerrors="$mkerrors -m64"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- mkasm="go run mkasm.go"
- ;;
-darwin_arm64)
- mkerrors="$mkerrors -m64"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- mkasm="go run mkasm.go"
- ;;
-dragonfly_amd64)
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -dragonfly"
- mksysnum="go run mksysnum.go 'https://gitweb.dragonflybsd.org/dragonfly.git/blob_plain/HEAD:/sys/kern/syscalls.master'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-freebsd_386)
- mkerrors="$mkerrors -m32"
- mksyscall="go run mksyscall.go -l32"
- mksysnum="go run mksysnum.go 'https://cgit.freebsd.org/src/plain/sys/kern/syscalls.master?h=stable/12'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-freebsd_amd64)
- mkerrors="$mkerrors -m64"
- mksysnum="go run mksysnum.go 'https://cgit.freebsd.org/src/plain/sys/kern/syscalls.master?h=stable/12'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-freebsd_arm)
- mkerrors="$mkerrors"
- mksyscall="go run mksyscall.go -l32 -arm"
- mksysnum="go run mksysnum.go 'https://cgit.freebsd.org/src/plain/sys/kern/syscalls.master?h=stable/12'"
- # Let the type of C char be signed for making the bare syscall
- # API consistent across platforms.
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-freebsd_arm64)
- mkerrors="$mkerrors -m64"
- mksysnum="go run mksysnum.go 'https://cgit.freebsd.org/src/plain/sys/kern/syscalls.master?h=stable/12'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-freebsd_riscv64)
- mkerrors="$mkerrors -m64"
- mksysnum="go run mksysnum.go 'https://cgit.freebsd.org/src/plain/sys/kern/syscalls.master?h=stable/12'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-netbsd_386)
- mkerrors="$mkerrors -m32"
- mksyscall="go run mksyscall.go -l32 -netbsd"
- mksysnum="go run mksysnum.go 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-netbsd_amd64)
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -netbsd"
- mksysnum="go run mksysnum.go 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-netbsd_arm)
- mkerrors="$mkerrors"
- mksyscall="go run mksyscall.go -l32 -netbsd -arm"
- mksysnum="go run mksysnum.go 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master'"
- # Let the type of C char be signed for making the bare syscall
- # API consistent across platforms.
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-netbsd_arm64)
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -netbsd"
- mksysnum="go run mksysnum.go 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master'"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-openbsd_386)
- mkasm="go run mkasm.go"
- mkerrors="$mkerrors -m32"
- mksyscall="go run mksyscall.go -l32 -openbsd -libc"
- mksysctl="go run mksysctl_openbsd.go"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-openbsd_amd64)
- mkasm="go run mkasm.go"
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -openbsd -libc"
- mksysctl="go run mksysctl_openbsd.go"
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-openbsd_arm)
- mkasm="go run mkasm.go"
- mkerrors="$mkerrors"
- mksyscall="go run mksyscall.go -l32 -openbsd -arm -libc"
- mksysctl="go run mksysctl_openbsd.go"
- # Let the type of C char be signed for making the bare syscall
- # API consistent across platforms.
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-openbsd_arm64)
- mkasm="go run mkasm.go"
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -openbsd -libc"
- mksysctl="go run mksysctl_openbsd.go"
- # Let the type of C char be signed for making the bare syscall
- # API consistent across platforms.
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-openbsd_mips64)
- mkerrors="$mkerrors -m64"
- mksyscall="go run mksyscall.go -openbsd"
- mksysctl="go run mksysctl_openbsd.go"
- mksysnum="go run mksysnum.go 'https://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/sys/kern/syscalls.master'"
- # Let the type of C char be signed for making the bare syscall
- # API consistent across platforms.
- mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
- ;;
-solaris_amd64)
- mksyscall="go run mksyscall_solaris.go"
- mkerrors="$mkerrors -m64"
- mksysnum=
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-illumos_amd64)
- mksyscall="go run mksyscall_solaris.go"
- mkerrors=
- mksysnum=
- mktypes="GOARCH=$GOARCH go tool cgo -godefs"
- ;;
-*)
- echo 'unrecognized $GOOS_$GOARCH: ' "$GOOSARCH" 1>&2
- exit 1
- ;;
-esac
-
-(
- if [ -n "$mkerrors" ]; then echo "$mkerrors |gofmt >$zerrors"; fi
- case "$GOOS" in
- *)
- syscall_goos="syscall_$GOOS.go"
- case "$GOOS" in
- darwin | dragonfly | freebsd | netbsd | openbsd)
- syscall_goos="syscall_bsd.go $syscall_goos"
- ;;
- esac
- if [ -n "$mksyscall" ]; then
- if [ "$GOOSARCH" == "aix_ppc64" ]; then
- # aix/ppc64 script generates files instead of writing to stdin.
- echo "$mksyscall -tags $GOOS,$GOARCH $syscall_goos $GOOSARCH_in && gofmt -w zsyscall_$GOOSARCH.go && gofmt -w zsyscall_"$GOOSARCH"_gccgo.go && gofmt -w zsyscall_"$GOOSARCH"_gc.go " ;
- elif [ "$GOOS" == "illumos" ]; then
- # illumos code generation requires a --illumos switch
- echo "$mksyscall -illumos -tags illumos,$GOARCH syscall_illumos.go |gofmt > zsyscall_illumos_$GOARCH.go";
- # illumos implies solaris, so solaris code generation is also required
- echo "$mksyscall -tags solaris,$GOARCH syscall_solaris.go syscall_solaris_$GOARCH.go |gofmt >zsyscall_solaris_$GOARCH.go";
- else
- echo "$mksyscall -tags $GOOS,$GOARCH $syscall_goos $GOOSARCH_in |gofmt >zsyscall_$GOOSARCH.go";
- fi
- fi
- esac
- if [ -n "$mksysctl" ]; then echo "$mksysctl |gofmt >$zsysctl"; fi
- if [ -n "$mksysnum" ]; then echo "$mksysnum |gofmt >zsysnum_$GOOSARCH.go"; fi
- if [ -n "$mktypes" ]; then echo "$mktypes types_$GOOS.go | go run mkpost.go > ztypes_$GOOSARCH.go"; fi
- if [ -n "$mkasm" ]; then echo "$mkasm $GOOS $GOARCH"; fi
-) | $run
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
deleted file mode 100644
index 2ab44aa..0000000
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ /dev/null
@@ -1,778 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# Generate Go code listing errors and other #defined constant
-# values (ENAMETOOLONG etc.), by asking the preprocessor
-# about the definitions.
-
-unset LANG
-export LC_ALL=C
-export LC_CTYPE=C
-
-if test -z "$GOARCH" -o -z "$GOOS"; then
- echo 1>&2 "GOARCH or GOOS not defined in environment"
- exit 1
-fi
-
-# Check that we are using the new build system if we should
-if [[ "$GOOS" = "linux" ]] && [[ "$GOLANG_SYS_BUILD" != "docker" ]]; then
- echo 1>&2 "In the Docker based build system, mkerrors should not be called directly."
- echo 1>&2 "See README.md"
- exit 1
-fi
-
-if [[ "$GOOS" = "aix" ]]; then
- CC=${CC:-gcc}
-else
- CC=${CC:-cc}
-fi
-
-if [[ "$GOOS" = "solaris" ]]; then
- # Assumes GNU versions of utilities in PATH.
- export PATH=/usr/gnu/bin:$PATH
-fi
-
-uname=$(uname)
-
-includes_AIX='
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#define AF_LOCAL AF_UNIX
-'
-
-includes_Darwin='
-#define _DARWIN_C_SOURCE
-#define KERNEL 1
-#define _DARWIN_USE_64_BIT_INODE
-#define __APPLE_USE_RFC_3542
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-// for backwards compatibility because moved TIOCREMOTE to Kernel.framework after MacOSX12.0.sdk.
-#define TIOCREMOTE 0x80047469
-'
-
-includes_DragonFly='
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-'
-
-includes_FreeBSD='
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#if __FreeBSD__ >= 10
-#define IFT_CARP 0xf8 // IFT_CARP is deprecated in FreeBSD 10
-#undef SIOCAIFADDR
-#define SIOCAIFADDR _IOW(105, 26, struct oifaliasreq) // ifaliasreq contains if_data
-#undef SIOCSIFPHYADDR
-#define SIOCSIFPHYADDR _IOW(105, 70, struct oifaliasreq) // ifaliasreq contains if_data
-#endif
-'
-
-includes_Linux='
-#define _LARGEFILE_SOURCE
-#define _LARGEFILE64_SOURCE
-#ifndef __LP64__
-#define _FILE_OFFSET_BITS 64
-#endif
-#define _GNU_SOURCE
-
-// is broken on powerpc64, as it fails to include definitions of
-// these structures. We just include them copied from .
-#if defined(__powerpc__)
-struct sgttyb {
- char sg_ispeed;
- char sg_ospeed;
- char sg_erase;
- char sg_kill;
- short sg_flags;
-};
-
-struct tchars {
- char t_intrc;
- char t_quitc;
- char t_startc;
- char t_stopc;
- char t_eofc;
- char t_brkc;
-};
-
-struct ltchars {
- char t_suspc;
- char t_dsuspc;
- char t_rprntc;
- char t_flushc;
- char t_werasc;
- char t_lnextc;
-};
-#endif
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#if defined(__sparc__)
-// On sparc{,64}, the kernel defines struct termios2 itself which clashes with the
-// definition in glibc. As only the error constants are needed here, include the
-// generic termibits.h (which is included by termbits.h on sparc).
-#include
-#else
-#include
-#endif
-
-#ifndef MSG_FASTOPEN
-#define MSG_FASTOPEN 0x20000000
-#endif
-
-#ifndef PTRACE_GETREGS
-#define PTRACE_GETREGS 0xc
-#endif
-
-#ifndef PTRACE_SETREGS
-#define PTRACE_SETREGS 0xd
-#endif
-
-#ifndef SOL_NETLINK
-#define SOL_NETLINK 270
-#endif
-
-#ifndef SOL_SMC
-#define SOL_SMC 286
-#endif
-
-#ifdef SOL_BLUETOOTH
-// SPARC includes this in /usr/include/sparc64-linux-gnu/bits/socket.h
-// but it is already in bluetooth_linux.go
-#undef SOL_BLUETOOTH
-#endif
-
-// Certain constants are missing from the fs/crypto UAPI
-#define FS_KEY_DESC_PREFIX "fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE 8
-#define FS_MAX_KEY_SIZE 64
-
-// The code generator produces -0x1 for (~0), but an unsigned value is necessary
-// for the tipc_subscr timeout __u32 field.
-#undef TIPC_WAIT_FOREVER
-#define TIPC_WAIT_FOREVER 0xffffffff
-
-// Copied from linux/l2tp.h
-// Including linux/l2tp.h here causes conflicts between linux/in.h
-// and netinet/in.h included via net/route.h above.
-#define IPPROTO_L2TP 115
-
-// Copied from linux/hid.h.
-// Keep in sync with the size of the referenced fields.
-#define _HIDIOCGRAWNAME_LEN 128 // sizeof_field(struct hid_device, name)
-#define _HIDIOCGRAWPHYS_LEN 64 // sizeof_field(struct hid_device, phys)
-#define _HIDIOCGRAWUNIQ_LEN 64 // sizeof_field(struct hid_device, uniq)
-
-#define _HIDIOCGRAWNAME HIDIOCGRAWNAME(_HIDIOCGRAWNAME_LEN)
-#define _HIDIOCGRAWPHYS HIDIOCGRAWPHYS(_HIDIOCGRAWPHYS_LEN)
-#define _HIDIOCGRAWUNIQ HIDIOCGRAWUNIQ(_HIDIOCGRAWUNIQ_LEN)
-
-'
-
-includes_NetBSD='
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include