### chacha20 - ChaCha20
#### Yawning Angel (yawning at schwanenlied dot me)
Yet another Go ChaCha20 implementation. Everything else I found was slow,
didn't support all the variants I need to use, or relied on cgo to go fast.
* 20 round, 256 bit key only. Everything else is pointless and stupid.
* IETF 96 bit nonce variant.
* XChaCha 24 byte nonce variant.
* SSSE3 and AVX2 support on amd64 targets.
* Incremental encrypt/decrypt support, unlike golang.org/x/crypto/salsa20.
@ -0,0 +1,290 @@
// Copryright (C) 2019 Yawning Angel
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package chacha20 implements the ChaCha20 stream cipher.
package chacha20 // import "blitter.com/go/chacha20"
import (
const (
// KeySize is the ChaCha20 key size in bytes.
KeySize = 32
// NonceSize is the ChaCha20 nonce size in bytes.
NonceSize = 8
// INonceSize is the IETF ChaCha20 nonce size in bytes.
INonceSize = 12
// XNonceSize is the XChaCha20 nonce size in bytes.
XNonceSize = 24
// HNonceSize is the HChaCha20 nonce size in bytes.
HNonceSize = 16
var (
// ErrInvalidKey is the error returned when the key is invalid.
ErrInvalidKey = errors.New("chacha20: key length must be KeySize bytes")
// ErrInvalidNonce is the error returned when the nonce is invalid.
ErrInvalidNonce = errors.New("chacha20: nonce length must be NonceSize/INonceSize/XNonceSize bytes")
// ErrInvalidCounter is the error returned when the counter is invalid.
ErrInvalidCounter = errors.New("chacha20: block counter is invalid (out of range)")
supportedImpls []api.Implementation
activeImpl api.Implementation
_ cipher.Stream = (*Cipher)(nil)
// Cipher is an instance of ChaCha20/XChaCha20 using a particular key and nonce.
type Cipher struct {
state [api.StateSize]uint32
buf [api.BlockSize]byte
off int
ietf bool
// Reset zeros the key data so that it will no longer appear in the process's
// memory.
func (c *Cipher) Reset() {
for i := range c.state {
c.state[i] = 0
for i := range c.buf {
c.buf[i] = 0
// Seek sets the block counter to a given offset.
func (c *Cipher) Seek(blockCounter uint64) error {
if c.ietf {
if blockCounter > math.MaxUint32 {
return ErrInvalidCounter
c.state[12] = uint32(blockCounter)
} else {
c.state[12] = uint32(blockCounter)
c.state[13] = uint32(blockCounter >> 32)
c.off = api.BlockSize
return nil
// ReKey reinitializes the ChaCha20/XChaCha20 instance with the provided key
// and nonce.
func (c *Cipher) ReKey(key, nonce []byte) error {
return c.doReKey(key, nonce)
func (c *Cipher) doReKey(key, nonce []byte) error {
if len(key) != KeySize {
return ErrInvalidKey
var subKey []byte
switch len(nonce) {
case NonceSize, INonceSize:
case XNonceSize:
subKey = c.buf[:KeySize]
activeImpl.HChaCha(key, nonce, subKey)
key = subKey
nonce = nonce[16:24]
return ErrInvalidNonce
_ = key[31] // Force bounds check elimination.
c.state[0] = api.Sigma0
c.state[1] = api.Sigma1
c.state[2] = api.Sigma2
c.state[3] = api.Sigma3
c.state[4] = binary.LittleEndian.Uint32(key[0:4])
c.state[5] = binary.LittleEndian.Uint32(key[4:8])
c.state[6] = binary.LittleEndian.Uint32(key[8:12])
c.state[7] = binary.LittleEndian.Uint32(key[12:16])
c.state[8] = binary.LittleEndian.Uint32(key[16:20])
c.state[9] = binary.LittleEndian.Uint32(key[20:24])
c.state[10] = binary.LittleEndian.Uint32(key[24:28])
c.state[11] = binary.LittleEndian.Uint32(key[28:32])
c.state[12] = 0
if len(nonce) == INonceSize {
_ = nonce[11] // Force bounds check elimination.
c.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
c.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
c.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
c.ietf = true
} else {
_ = nonce[7] // Force bounds check elimination.
c.state[13] = 0
c.state[14] = binary.LittleEndian.Uint32(nonce[0:4])
c.state[15] = binary.LittleEndian.Uint32(nonce[4:8])
c.ietf = false
c.off = api.BlockSize
if subKey != nil {
for i := range subKey {
subKey[i] = 0
return nil
// New returns a new ChaCha20/XChaCha20 instance.
func New(key, nonce []byte) (*Cipher, error) {
var c Cipher
if err := c.doReKey(key, nonce); err != nil {
return nil, err
return &c, nil
// HChaCha is the HChaCha20 hash function used to make XChaCha.
func HChaCha(key, nonce []byte, dst *[32]byte) {
activeImpl.HChaCha(key, nonce, dst[:])
// XORKeyStream sets dst to the result of XORing src with the key stream. Dst
// and src may be the same slice but otherwise should not overlap.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
src = src[:len(dst)]
for remaining := len(src); remaining > 0; {
// Process multiple blocks at once.
if c.off == api.BlockSize {
nrBlocks := remaining / api.BlockSize
directBytes := nrBlocks * api.BlockSize
if nrBlocks > 0 {
c.doBlocks(dst, src, nrBlocks)
remaining -= directBytes
if remaining == 0 {
dst = dst[directBytes:]
src = src[directBytes:]
// If there's a partial block, generate 1 block of keystream into
// the internal buffer.
c.doBlocks(c.buf[:], nil, 1)
c.off = 0
// Process partial blocks from the buffered keystream.
toXor := api.BlockSize - c.off
if remaining < toXor {
toXor = remaining
if toXor > 0 {
// The inliner doesn't want to inline this function, but my
// attempts to force BCE don't seem to work with manual
// inlining.
// Taking the extra function call overhead here appears to be
// worth it.
c.xorBufBytes(dst, src, toXor)
dst = dst[toXor:]
src = src[toXor:]
remaining -= toXor
func (c *Cipher) xorBufBytes(dst, src []byte, n int) {
// Force bounds check elimination.
buf := c.buf[c.off:]
_ = buf[n-1]
_ = dst[n-1]
_ = src[n-1]
for i := 0; i < n; i++ {
dst[i] = buf[i] ^ src[i]
c.off += n
// KeyStream sets dst to the raw keystream.
func (c *Cipher) KeyStream(dst []byte) {
for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once.
if c.off == api.BlockSize {
nrBlocks := remaining / api.BlockSize
directBytes := nrBlocks * api.BlockSize
if nrBlocks > 0 {
c.doBlocks(dst, nil, nrBlocks)
remaining -= directBytes
if remaining == 0 {
dst = dst[directBytes:]
// If there's a partial block, generate 1 block of keystream into
// the internal buffer.
c.doBlocks(c.buf[:], nil, 1)
c.off = 0
// Process partial blocks from the buffered keystream.
toCopy := api.BlockSize - c.off
if remaining < toCopy {
toCopy = remaining
if toCopy > 0 {
copy(dst[:toCopy], c.buf[c.off:c.off+toCopy])
dst = dst[toCopy:]
remaining -= toCopy
c.off += toCopy
func (c *Cipher) doBlocks(dst, src []byte, nrBlocks int) {
if c.ietf {
ctr := uint64(c.state[12])
if ctr+uint64(nrBlocks) > math.MaxUint32 {
panic("chacha20: will exceed key stream per nonce limit")
activeImpl.Blocks(&c.state, dst, src, nrBlocks)
func init() {
supportedImpls = hardware.Register(supportedImpls)
supportedImpls = ref.Register(supportedImpls)
activeImpl = supportedImpls[0]
@ -0,0 +1,59 @@
// Copryright (C) 2019 Yawning Angel
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package api provides the ChaCha20 implementation abstract interface.
package api
const (
// BlockSize is the size of a ChaCha20 block in bytes.
BlockSize = 64
// StateSize is the size of the ChaCha20 state as 32 bit unsigned words.
StateSize = 16
// HashSize is the size of the HChaCha output in bytes.
HashSize = 32
// HNonceSize is the HChaCha20 nonce size in bytes.
HNonceSize = 16
// Sigma0 is the first word of the ChaCha constant.
Sigma0 = uint32(0x61707865)
// Sigma1 is the second word of the ChaCha constant.
Sigma1 = uint32(0x3320646e)
// Sigma2 is the third word of the ChaCha constant.
Sigma2 = uint32(0x79622d32)
// Sigma3 is the fourth word of the ChaCha constant.
Sigma3 = uint32(0x6b206574)
// Implementation is a ChaCha20 implementation
type Implementation interface {
// Name returns the name of the implementation.
Name() string
// Blocks calculates the ChaCha20 blocks. If src is not nil, dst will
// be set to the XOR of src with the key stream, otherwise dst will be
// set to the key stream.
Blocks(x *[StateSize]uint32, dst, src []byte, nrBlocks int)
// HChaCha calculates the HChaCha20 hash.
// Note: `dst` is guaranteed to be HashSize bytes.
HChaCha(key, nonce []byte, dst []byte)
@ -0,0 +1,27 @@
// Copryright (C) 2019 Yawning Angel
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package hardware provides the hardware accelerated ChaCha20 implementations.
package hardware
import "blitter.com/go/chacha20/internal/api"
var hardwareImpls []api.Implementation
// Register appends the implementation(s) to the provided slice, and returns the
// new slice.
func Register(impls []api.Implementation) []api.Implementation {
return append(impls, hardwareImpls...)
@ -0,0 +1,88 @@
// Copryright (C) 2019 Yawning Angel
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// +build amd64,!noasm
package hardware
import (
func blocksAVX2(s *[api.StateSize]uint32, in, out []byte)
func hChaChaAVX2(key, nonce []byte, dst *byte)
func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte)
func hChaChaSSSE3(key, nonce []byte, dst *byte)
type implAmd64 struct {
name string
blocksFn func(*[api.StateSize]uint32, []byte, []byte, int)
hChaChaFn func([]byte, []byte, *byte)
func (impl *implAmd64) Name() string {
return impl.name
func (impl *implAmd64) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
impl.blocksFn(x, dst, src, nrBlocks)
func (impl *implAmd64) HChaCha(key, nonce []byte, dst []byte) {
impl.hChaChaFn(key, nonce, &dst[0])
func blockWrapper(fn func(*[api.StateSize]uint32, []byte, []byte)) func(*[api.StateSize]uint32, []byte, []byte, int) {
return func(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
sz := nrBlocks * api.BlockSize
if src != nil {
fn(x, src[:sz], dst[:sz])
} else {
// Sub-optimal, but the compiler special cases this to an assembly
// optimized runtime.memclrNoHeapPointers, so it's not terrible.
for i := range dst[:sz] {
dst[i] = 0
fn(x, dst[:sz], dst[:sz])
func init() {
if cpu.X86.HasAVX2 {
hardwareImpls = append(hardwareImpls, &implAmd64{
name: "amd64_avx2",
blocksFn: blockWrapper(blocksAVX2),
hChaChaFn: hChaChaAVX2,
if cpu.X86.HasSSE3 {
hardwareImpls = append(hardwareImpls, &implAmd64{
name: "amd64_ssse3",
blocksFn: blockWrapper(blocksSSSE3),
hChaChaFn: hChaChaSSSE3,
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,374 @@
// Copryright (C) 2019 Yawning Angel
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package ref provides the portable ChaCha20 implementation.
package ref
import (
const rounds = 20
// Impl is the reference implementation (exposed for testing).
var Impl = &implRef{}
type implRef struct{}
func (impl *implRef) Name() string {
return "ref"
func (impl *implRef) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
for n := 0; n < nrBlocks; n++ {
x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
for i := rounds; i > 0; i -= 2 {
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ^= x0
x12 = bits.RotateLeft32(x12, 16)
x8 += x12
x4 ^= x8
x4 = bits.RotateLeft32(x4, 12)
x0 += x4
x12 ^= x0
x12 = bits.RotateLeft32(x12, 8)
x8 += x12
x4 ^= x8
x4 = bits.RotateLeft32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ^= x1
x13 = bits.RotateLeft32(x13, 16)
x9 += x13
x5 ^= x9
x5 = bits.RotateLeft32(x5, 12)
x1 += x5
x13 ^= x1
x13 = bits.RotateLeft32(x13, 8)
x9 += x13
x5 ^= x9
x5 = bits.RotateLeft32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ^= x2
x14 = bits.RotateLeft32(x14, 16)
x10 += x14
x6 ^= x10
x6 = bits.RotateLeft32(x6, 12)
x2 += x6
x14 ^= x2
x14 = bits.RotateLeft32(x14, 8)
x10 += x14
x6 ^= x10
x6 = bits.RotateLeft32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ^= x3
x15 = bits.RotateLeft32(x15, 16)
x11 += x15
x7 ^= x11
x7 = bits.RotateLeft32(x7, 12)
x3 += x7
x15 ^= x3
x15 = bits.RotateLeft32(x15, 8)
x11 += x15
x7 ^= x11
x7 = bits.RotateLeft32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ^= x0
x15 = bits.RotateLeft32(x15, 16)
x10 += x15
x5 ^= x10
x5 = bits.RotateLeft32(x5, 12)
x0 += x5
x15 ^= x0
x15 = bits.RotateLeft32(x15, 8)
x10 += x15
x5 ^= x10
x5 = bits.RotateLeft32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ^= x1
x12 = bits.RotateLeft32(x12, 16)
x11 += x12
x6 ^= x11
x6 = bits.RotateLeft32(x6, 12)
x1 += x6
x12 ^= x1
x12 = bits.RotateLeft32(x12, 8)
x11 += x12
x6 ^= x11
x6 = bits.RotateLeft32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ^= x2
x13 = bits.RotateLeft32(x13, 16)
x8 += x13
x7 ^= x8
x7 = bits.RotateLeft32(x7, 12)
x2 += x7
x13 ^= x2
x13 = bits.RotateLeft32(x13, 8)
x8 += x13
x7 ^= x8
x7 = bits.RotateLeft32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ^= x3
x14 = bits.RotateLeft32(x14, 16)
x9 += x14
x4 ^= x9
x4 = bits.RotateLeft32(x4, 12)
x3 += x4
x14 ^= x3
x14 = bits.RotateLeft32(x14, 8)
x9 += x14
x4 ^= x9
x4 = bits.RotateLeft32(x4, 7)
x0 += api.Sigma0
x1 += api.Sigma1
x2 += api.Sigma2
x3 += api.Sigma3
x4 += x[4]
x5 += x[5]
x6 += x[6]
x7 += x[7]
x8 += x[8]
x9 += x[9]
x10 += x[10]
x11 += x[11]
x12 += x[12]
x13 += x[13]
x14 += x[14]
x15 += x[15]
_ = dst[api.BlockSize-1] // Force bounds check elimination.
if src != nil {
_ = src[api.BlockSize-1] // Force bounds check elimination.
binary.LittleEndian.PutUint32(dst[0:4], binary.LittleEndian.Uint32(src[0:4])^x0)
binary.LittleEndian.PutUint32(dst[4:8], binary.LittleEndian.Uint32(src[4:8])^x1)
binary.LittleEndian.PutUint32(dst[8:12], binary.LittleEndian.Uint32(src[8:12])^x2)
binary.LittleEndian.PutUint32(dst[12:16], binary.LittleEndian.Uint32(src[12:16])^x3)
binary.LittleEndian.PutUint32(dst[16:20], binary.LittleEndian.Uint32(src[16:20])^x4)
binary.LittleEndian.PutUint32(dst[20:24], binary.LittleEndian.Uint32(src[20:24])^x5)
binary.LittleEndian.PutUint32(dst[24:28], binary.LittleEndian.Uint32(src[24:28])^x6)
binary.LittleEndian.PutUint32(dst[28:32], binary.LittleEndian.Uint32(src[28:32])^x7)
binary.LittleEndian.PutUint32(dst[32:36], binary.LittleEndian.Uint32(src[32:36])^x8)
binary.LittleEndian.PutUint32(dst[36:40], binary.LittleEndian.Uint32(src[36:40])^x9)
binary.LittleEndian.PutUint32(dst[40:44], binary.LittleEndian.Uint32(src[40:44])^x10)
binary.LittleEndian.PutUint32(dst[44:48], binary.LittleEndian.Uint32(src[44:48])^x11)
binary.LittleEndian.PutUint32(dst[48:52], binary.LittleEndian.Uint32(src[48:52])^x12)
binary.LittleEndian.PutUint32(dst[52:56], binary.LittleEndian.Uint32(src[52:56])^x13)
binary.LittleEndian.PutUint32(dst[56:60], binary.LittleEndian.Uint32(src[56:60])^x14)
binary.LittleEndian.PutUint32(dst[60:64], binary.LittleEndian.Uint32(src[60:64])^x15)
src = src[api.BlockSize:]
} else {
binary.LittleEndian.PutUint32(dst[0:4], x0)
binary.LittleEndian.PutUint32(dst[4:8], x1)
binary.LittleEndian.PutUint32(dst[8:12], x2)
binary.LittleEndian.PutUint32(dst[12:16], x3)
binary.LittleEndian.PutUint32(dst[16:20], x4)
binary.LittleEndian.PutUint32(dst[20:24], x5)
binary.LittleEndian.PutUint32(dst[24:28], x6)
binary.LittleEndian.PutUint32(dst[28:32], x7)
binary.LittleEndian.PutUint32(dst[32:36], x8)
binary.LittleEndian.PutUint32(dst[36:40], x9)
binary.LittleEndian.PutUint32(dst[40:44], x10)
binary.LittleEndian.PutUint32(dst[44:48], x11)
binary.LittleEndian.PutUint32(dst[48:52], x12)
binary.LittleEndian.PutUint32(dst[52:56], x13)
binary.LittleEndian.PutUint32(dst[56:60], x14)
binary.LittleEndian.PutUint32(dst[60:64], x15)
dst = dst[api.BlockSize:]
// Stoping at 2^70 bytes per nonce is the user's responsibility.
ctr := uint64(x[13])<<32 | uint64(x[12])
x[12] = uint32(ctr)
x[13] = uint32(ctr >> 32)
func (impl *implRef) HChaCha(key, nonce []byte, dst []byte) {
// Force bounds check elimination.
_ = key[31]
_ = nonce[api.HNonceSize-1]
x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
x4 := binary.LittleEndian.Uint32(key[0:4])
x5 := binary.LittleEndian.Uint32(key[4:8])
x6 := binary.LittleEndian.Uint32(key[8:12])
x7 := binary.LittleEndian.Uint32(key[12:16])
x8 := binary.LittleEndian.Uint32(key[16:20])
x9 := binary.LittleEndian.Uint32(key[20:24])
x10 := binary.LittleEndian.Uint32(key[24:28])
x11 := binary.LittleEndian.Uint32(key[28:32])
x12 := binary.LittleEndian.Uint32(nonce[0:4])
x13 := binary.LittleEndian.Uint32(nonce[4:8])
x14 := binary.LittleEndian.Uint32(nonce[8:12])
x15 := binary.LittleEndian.Uint32(nonce[12:16])
// Yes, this could be carved out into a function for code reuse (TM)
// however the go inliner won't inline it.
for i := rounds; i > 0; i -= 2 {
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ^= x0
x12 = bits.RotateLeft32(x12, 16)
x8 += x12
x4 ^= x8
x4 = bits.RotateLeft32(x4, 12)
x0 += x4
x12 ^= x0
x12 = bits.RotateLeft32(x12, 8)
x8 += x12
x4 ^= x8
x4 = bits.RotateLeft32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ^= x1
x13 = bits.RotateLeft32(x13, 16)
x9 += x13
x5 ^= x9
x5 = bits.RotateLeft32(x5, 12)
x1 += x5
x13 ^= x1
x13 = bits.RotateLeft32(x13, 8)
x9 += x13
x5 ^= x9
x5 = bits.RotateLeft32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ^= x2
x14 = bits.RotateLeft32(x14, 16)
x10 += x14
x6 ^= x10
x6 = bits.RotateLeft32(x6, 12)
x2 += x6
x14 ^= x2
x14 = bits.RotateLeft32(x14, 8)
x10 += x14
x6 ^= x10
x6 = bits.RotateLeft32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ^= x3
x15 = bits.RotateLeft32(x15, 16)
x11 += x15
x7 ^= x11
x7 = bits.RotateLeft32(x7, 12)
x3 += x7
x15 ^= x3
x15 = bits.RotateLeft32(x15, 8)
x11 += x15
x7 ^= x11
x7 = bits.RotateLeft32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ^= x0
x15 = bits.RotateLeft32(x15, 16)
x10 += x15
x5 ^= x10
x5 = bits.RotateLeft32(x5, 12)
x0 += x5
x15 ^= x0
x15 = bits.RotateLeft32(x15, 8)
x10 += x15
x5 ^= x10
x5 = bits.RotateLeft32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ^= x1
x12 = bits.RotateLeft32(x12, 16)
x11 += x12
x6 ^= x11
x6 = bits.RotateLeft32(x6, 12)
x1 += x6
x12 ^= x1
x12 = bits.RotateLeft32(x12, 8)
x11 += x12
x6 ^= x11
x6 = bits.RotateLeft32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ^= x2
x13 = bits.RotateLeft32(x13, 16)
x8 += x13
x7 ^= x8
x7 = bits.RotateLeft32(x7, 12)
x2 += x7
x13 ^= x2
x13 = bits.RotateLeft32(x13, 8)
x8 += x13
x7 ^= x8
x7 = bits.RotateLeft32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ^= x3
x14 = bits.RotateLeft32(x14, 16)
x9 += x14
x4 ^= x9
x4 = bits.RotateLeft32(x4, 12)
x3 += x4
x14 ^= x3
x14 = bits.RotateLeft32(x14, 8)
x9 += x14
x4 ^= x9
x4 = bits.RotateLeft32(x4, 7)
// HChaCha returns x0...x3 | x12...x15, which corresponds to the
// indexes of the ChaCha constant and the indexes of the IV.
_ = dst[api.HashSize-1] // Force bounds check elimination.
binary.LittleEndian.PutUint32(dst[0:4], x0)
binary.LittleEndian.PutUint32(dst[4:8], x1)
binary.LittleEndian.PutUint32(dst[8:12], x2)
binary.LittleEndian.PutUint32(dst[12:16], x3)
binary.LittleEndian.PutUint32(dst[16:20], x12)
binary.LittleEndian.PutUint32(dst[20:24], x13)
binary.LittleEndian.PutUint32(dst[24:28], x14)
binary.LittleEndian.PutUint32(dst[28:32], x15)
// Register appends the implementation to the provided slice, and returns the
// new slice.
func Register(impls []api.Implementation) []api.Implementation {
return append(impls, Impl)
@ -0,0 +1,6 @@
Implementation of cryptMTv1 stream cipher (but with mtwist64 as base accum)
Uses Mersenne Twister 64 golang implementation supplied by [cuixin](https://gist.github.com/cuixin): [gist](https://gist.github.com/cuixin/1b8b6bd7bfbde8fe76e8)
@ -0,0 +1,78 @@
// Package CryptMT - implementation of cryptMTv1 stream cipher
// (but with mtwist64 as base accum)
// https://eprint.iacr.org/2005/165.pdf
package cryptmt
// TODO rlm: according to go docs, stream ciphers do not implement the
// cipher.Block interface at all (thus do not support Encrypt() or
// Decrypt() .. cipher.StreamReader/StreamWriter() only call
// XORKeyStream() anyhow and for my own purposes this is all that is
// required.
import (
mtwist "blitter.com/go/mtwist"
type Cipher struct {
r io.Reader
w io.Writer
accum uint64
m *mtwist.MT19937_64
func (c *Cipher) yield() (r byte) {
c.accum = c.accum * (c.m.Int63() | 1)
r = byte(c.accum>>56) & 0xFF
// New creates and returns a Cipher. The key argument should be the
// CryptMT key, 64 bytes.
func New(r io.Reader, w io.Writer, key []byte) (c *Cipher) {
c = &Cipher{m: mtwist.New(), r: r, w: w}
c.accum = 1
// from paper, discard first 64 bytes of output
for idx := 0; idx < 64; idx++ {
_ = c.yield()
return c
func (c *Cipher) Read(p []byte) (n int, err error) {
n, err = c.r.Read(p)
if err == nil {
for idx := 0; idx < n; idx++ {
p[idx] = p[idx] ^ c.yield()
return n, err
func (c *Cipher) Write(p []byte) (n int, err error) {
n, err = c.w.Write(p)
return n, err
// XORKeyStream XORs each byte in the given slice with a byte from the
// cipher's key stream. Dst and src must overlap entirely or not at all.
// If len(dst) < len(src), XORKeyStream should panic. It is acceptable
// to pass a dst bigger than src, and in that case, XORKeyStream will
// only update dst[:len(src)] and will not touch the rest of dst.
// Multiple calls to XORKeyStream behave as if the concatenation of
// the src buffers was passed in a single run. That is, Stream
// maintains state and does not reset at each XORKeyStream call.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
panic(errors.New("len(dst) < len(src)"))
for i, b := range src {
dst[i] = b ^ c.yield()
@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 - 2018 Russell Magee
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
@ -0,0 +1,17 @@
goutmp - Minimal bindings to C stdlib pututmpx(), getutmpx() (/var/log/wtmp) and /var/log/lastlog
Any Go program which allows user shell access should update the standard UNIX files which track user sessions: /var/log/wtmp (for the 'w' and 'who' commands), and /var/log/lastlog (the 'last' and 'lastlog' commands).
go doc
package goutmp // import "blitter.com/go/goutmp"
Golang bindings for basic login/utmp accounting
type UtmpEntry struct{ ... }
func Put_lastlog_entry(app, usr, ptsname, host string)
func Unput_utmp(entry UtmpEntry)
func Put_utmp(user, ptsname, host string) UtmpEntry
@ -0,0 +1,136 @@
// +build freebsd
// Golang bindings for basic login/utmp accounting
package goutmp
//#include <stdio.h>
//#include <stdlib.h>
//#include <sys/file.h>
//#include <string.h>
//#include <unistd.h>
//#include <stdint.h>
//#include <time.h>
//#include <pwd.h>
//#include <utmpx.h>
//typedef char char_t;
//void pututmpx(struct utmpx* entry, char* uname, char* ptsname, char* host) {
// entry->ut_type = USER_PROCESS;
// entry->ut_pid = getpid();
// strcpy(entry->ut_line, ptsname + strlen("/dev/"));
// strcpy(entry->ut_id, ptsname + strlen("/dev/pts/"));
// //entry->ut_time = time(NULL);
// strcpy(entry->ut_user, uname);
// strcpy(entry->ut_host, host);
// //entry->ut_addr = 0;
// setutxent();
// pututxline(entry);
//void unpututmpx(struct utmpx* entry) {
// entry->ut_type = DEAD_PROCESS;
// entry->ut_line[0] = '\0';
// //entry->ut_time = 0;
// entry->ut_user[0] = '\0';
// setutxent();
// pututxline(entry);
// endutxent();
//#if 0
//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
// int retval = 0;
// FILE *f;
// struct lastlog l;
// strncpy(l.ll_line, line, UT_LINESIZE);
// l.ll_line[UT_LINESIZE-1] = '\0';
// strncpy(l.ll_host, host, UT_HOSTSIZE);
// l.ll_host[UT_HOSTSIZE-1] = '\0';
// l.ll_time = (time_t)t;
// //printf("l: ll_line '%s', ll_host '%s', ll_time %d\n", l.ll_line, l.ll_host, l.ll_time);
// /* Write lastlog entry at fixed offset (uid * sizeof(struct lastlog) */
// if( NULL != (f = fopen("/var/log/lastlog", "rw+")) ) {
// if( !fseek(f, (uid * sizeof(struct lastlog)), SEEK_SET) ) {
// int fd = fileno(f);
// if( write(fd, &l, sizeof(l)) == sizeof(l) ) {
// retval = 1;
// //int32_t stat = system("echo ---- lastlog ----; lastlog");
// }
// }
// fclose(f);
// }
// return retval;
//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
// return 0;
import "C"
import (
// UtmpEntry wraps the C struct utmp
type UtmpEntry struct {
entry C.struct_utmpx
// return remote client hostname or IP if host lookup fails
// addr is expected to be of the format given by net.Addr.String()
// eg., "" or "[::1]:80"
func GetHost(addr string) (h string) {
if !strings.Contains(addr, "[") {
h = strings.Split(addr, ":")[0]
} else {
h = strings.Split(strings.Split(addr, "[")[1], "]")[0]
hList, e := net.LookupAddr(h)
//fmt.Printf("lookupAddr:%v\n", hList)
if e == nil {
h = hList[0]
// Put a username and the originating host/IP to utmp
func Put_utmp(user, ptsName, host string) UtmpEntry {
var entry UtmpEntry
//log.Println("Put_utmp:host ", host, " user ", user)
C.pututmpx(&entry.entry, C.CString(user), C.CString(ptsName), C.CString(host))
return entry
// Remove a username/host entry from utmp
func Unput_utmp(entry UtmpEntry) {
// Put the login app, username and originating host/IP to lastlog
func Put_lastlog_entry(app, usr, ptsname, host string) {
u, e := user.Lookup(usr)
if e != nil {
var uid uint32
fmt.Sscanf(u.Uid, "%d", &uid)
t := time.Now().Unix()
_ = C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
//stat := C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
//fmt.Println("stat was:",stat)
@ -0,0 +1,132 @@
// +build linux
// Golang bindings for basic login/utmp accounting
package goutmp
//#include <stdio.h>
//#include <stdlib.h>
//#include <sys/file.h>
//#include <string.h>
//#include <unistd.h>
//#include <stdint.h>
//#include <time.h>
//#include <pwd.h>
//#include <utmp.h>
//#include <lastlog.h>
//typedef char char_t;
//void pututmp(struct utmp* entry, char* uname, char* ptsname, char* host) {
// entry->ut_type = USER_PROCESS;
// entry->ut_pid = getpid();
// strcpy(entry->ut_line, ptsname + strlen("/dev/"));
// strcpy(entry->ut_id, ptsname + strlen("/dev/pts/"));
// entry->ut_time = time(NULL);
// strcpy(entry->ut_user, uname);
// strcpy(entry->ut_host, host);
// entry->ut_addr = 0;
// setutent();
// pututline(entry);
//void unpututmp(struct utmp* entry) {
// entry->ut_type = DEAD_PROCESS;
// memset(entry->ut_line, 0, UT_LINESIZE);
// entry->ut_time = 0;
// memset(entry->ut_user, 0, UT_NAMESIZE);
// setutent();
// pututline(entry);
// endutent();
//int putlastlogentry(int64_t t, int uid, char* line, char* host) {
// int retval = 0;
// FILE *f;
// struct lastlog l;
// strncpy(l.ll_line, line, UT_LINESIZE);
// l.ll_line[UT_LINESIZE-1] = '\0';
// strncpy(l.ll_host, host, UT_HOSTSIZE);
// l.ll_host[UT_HOSTSIZE-1] = '\0';
// l.ll_time = (time_t)t;
// //printf("l: ll_line '%s', ll_host '%s', ll_time %d\n", l.ll_line, l.ll_host, l.ll_time);
// /* Write lastlog entry at fixed offset (uid * sizeof(struct lastlog) */
// if( NULL != (f = fopen("/var/log/lastlog", "rw+")) ) {
// if( !fseek(f, (uid * sizeof(struct lastlog)), SEEK_SET) ) {
// int fd = fileno(f);
// if( write(fd, &l, sizeof(l)) == sizeof(l) ) {
// retval = 1;
// //int32_t stat = system("echo ---- lastlog ----; lastlog");
// }
// }
// fclose(f);
// }
// return retval;
import "C"
import (
// UtmpEntry wraps the C struct utmp
type UtmpEntry struct {
entry C.struct_utmp
// return remote client hostname or IP if host lookup fails
// addr is expected to be of the format given by net.Addr.String()
// eg., "" or "[::1]:80"
func GetHost(addr string) (h string) {
if !strings.Contains(addr, "[") {
h = strings.Split(addr, ":")[0]
} else {
h = strings.Split(strings.Split(addr, "[")[1], "]")[0]
hList, e := net.LookupAddr(h)
//fmt.Printf("lookupAddr:%v\n", hList)
if e == nil {
h = hList[0]
// Put a username and the originating host/IP to utmp
func Put_utmp(user, ptsName, host string) UtmpEntry {
var entry UtmpEntry
//log.Println("Put_utmp:host ", host, " user ", user)
C.pututmp(&entry.entry, C.CString(user), C.CString(ptsName), C.CString(host))
return entry
// Remove a username/host entry from utmp
func Unput_utmp(entry UtmpEntry) {
// Put the login app, username and originating host/IP to lastlog
func Put_lastlog_entry(app, usr, ptsname, host string) {
u, e := user.Lookup(usr)
if e != nil {
var uid uint32
fmt.Sscanf(u.Uid, "%d", &uid)
t := time.Now().Unix()
_ = C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
//stat := C.putlastlogentry(C.int64_t(t), C.int(uid), C.CString(app), C.CString(host))
//fmt.Println("stat was:",stat)
all: lib
go clean .
lib: info
go install .
ifneq ($(MSYSTEM),)
@echo "building for Windows (MSYS)"
@echo "building for Linux"
@ -0,0 +1 @@
HerraduraKEx - an experimental Key Encapsulation Mechanism
@ -0,0 +1,182 @@
// Package hkex - an experimental key exchange algorithm
// by Omar Alejandro Herrera Reyna.
// (https://github.com/Caume/HerraduraKEx)
// The core HerraduraKEx algorithm is dual-licensed
// by the author (Omar Alejandro Herrera Reyna)
// under GPL3 and MIT licenses.
// See LICENSE.gpl and LICENSE.mit in this distribution
// Go implementation Copyright (c) 2017-2018 Russell Magee
// (rmagee_at_gmail_com)
// Licensed under the terms of the MIT license
// See LICENSE.mit in this distribution
package hkex
/* Herradura - a Key exchange scheme in the style of Diffie-Hellman Key Exchange.
Copyright (C) 2017 Omar Alejandro Herrera Reyna
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
golang implementation by Russ Magee (rmagee_at_gmail.com) */
/* This is the core KEx algorithm. For client/server net support code,
See the hkexnet package (currently a sub-package of hkexsh) for a
golang/pkg/net compatible Conn interface using this to negotiate keys and
secure a network channel. */
import (
// HerraduraKEx holds the session state for a key exchange.
type HerraduraKEx struct {
intSz, pubSz int
randctx *rand.Rand
a *big.Int
b *big.Int
d, peerD *big.Int
fa *big.Int
// New returns a HerraduraKEx struct.
// i - internal (private) random nonce
// p - public (exchanged) random nonce (typically 1/4 bitsize of i)
// If i or p are passed as zero, they will default to 256 and 64,
// respectively.
func New(i int, p int) (h *HerraduraKEx) {
h = new(HerraduraKEx)
if i == 0 {
i = 256
if p == 0 {
p = 64
h.intSz = i
h.pubSz = p
h.a = h.rand()
h.b = h.rand()
h.d = h.fscxRevolve(h.a, h.b, h.pubSz)
return h
func (h *HerraduraKEx) seed() {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
h.randctx = r
func (h *HerraduraKEx) rand() (v *big.Int) {
v = big.NewInt(0)
v.Rand(h.randctx, h.getMax())
return v
// getMax returns the max value for an n-bit big.Int
func (h *HerraduraKEx) getMax() (n *big.Int) {
n = big.NewInt(0)
var max big.Int
for i := 0; i < h.intSz; i++ {
max.SetBit(n, i, 1)
n = &max
return n
func (h *HerraduraKEx) bitX(x *big.Int, pos int) (ret int64) {
if pos < 0 {
pos = h.intSz - pos
if pos == 0 {
ret = int64(x.Bit(1) ^ x.Bit(0) ^ x.Bit(h.intSz-1))
} else if pos == h.intSz-1 {
ret = int64(x.Bit(0) ^ x.Bit(pos) ^ x.Bit(pos-1))
} else {
ret = int64(x.Bit((pos+1)%h.intSz) ^ x.Bit(pos) ^ x.Bit(pos-1))
return ret
func (h *HerraduraKEx) bit(up, down *big.Int, posU, posD int) (ret *big.Int) {
return big.NewInt(h.bitX(up, posU) ^ h.bitX(down, posD))
func (h *HerraduraKEx) fscx(up, down *big.Int) (result *big.Int) {
result = big.NewInt(0)
for count := 0; count < h.intSz; count++ {
result.Lsh(result, 1)
result.Add(result, h.bit(up, down, count, count))
return result
// This is the iteration function using the result of the previous iteration
// as the first parameter and the second parameter of the first iteration.
func (h *HerraduraKEx) fscxRevolve(x, y *big.Int, passes int) (result *big.Int) {
result = x
for count := 0; count < passes; count++ {
result = h.fscx(result, y)
return result
// D returns the D (FSCX Revolved) value, input to generate FA
// (the value for peer KEx)
func (h HerraduraKEx) D() *big.Int {
return h.d
// PeerD returns the peer D value
func (h HerraduraKEx) PeerD() *big.Int {
return h.peerD
// SetPeerD stores the received peer's D value (contents, not ptr)
func (h *HerraduraKEx) SetPeerD(pd *big.Int) {
h.peerD = new(big.Int).Set(pd)
// ComputeFA computes the FA value, which must be sent to peer for KEx.
func (h *HerraduraKEx) ComputeFA() {
h.fa = h.fscxRevolve(h.peerD, h.b, h.intSz-h.pubSz)
h.fa.Xor(h.fa, h.a)
// FA returns the computed FA value
func (h HerraduraKEx) FA() *big.Int {
return h.fa
// Output HerraduraKEx type value as a string. Implements Stringer interface.
func (h *HerraduraKEx) String() string {
return fmt.Sprintf("s:%d p:%d\na:%s\nb:%s\nd:->%s\n<-peerD:%s\nfa:%s",
h.intSz, h.pubSz,
h.a.Text(16), h.b.Text(16),
@ -0,0 +1,29 @@
# hopscotch
Experimental cipher using multiple hash algs for keystream.
The cipher uses multiple trusted hash algorithms, each updated on a schedule (the security factor, 1 to 10) based initially on the secret key, then on random data from a PRNG (currently MTWIST-64, also seeded from the secret key). The XOR value used to encrypt plaintext is picked from bytes of the hash outputs (being appended together into a single pool P), the hash output byte chosen used then as a modulus value to 'hop' to the next XOR value within P (hence the name 'hopscotch').
The security of the algorithm is premised on the following axioms:
1. all hash algorithms used are sufficiently unpredictable in their output based on given input (most importantly the initial key as input to the PRNG and its seeded output);
2. the PRNG is sufficiently random so as to guarantee subsequent input to all hash algorithms used as potential keystream pool bytes for XOR operations is not predictable;
3. the keystream (being the hash outputs at a given time for all hash algorithms used) is re-keyed often enough to prevent excessive re-use of bytes ('excessive' being defined by the strength parameter, restricted from 1 to 10 inclusive by empirical analysis)
Current implementation uses 2 hash algorithms, SHA512 and BLAKE2B, both giving outputs of fixed-length = 64 bytes. Empirically, using security factors ranging from 1 to 10 (count of input bytes encrypted before re-keying by feeding 32 bytes of PRNG data to derive new hash output XOR pool P) it is unlikely that 64 picks from the pool would re-use the same bytes often enough to compromise security. Tests with the ['circle' analysis tool](https://github.com/circulosmeos/circle) and the 'Tux.ppm' image test indicate ciphertext does not resemble plaintext in any obvious manner. (TODO: diehard tests or others?)
The use purely of a PRNG plus two or more already-proven hash algorithms as keystream pool material offers a simply-verified security-primitive basis for confidence, plus easy extendability by adding more hash algorithms to the keystream pool P, without complexity of a full re-analysis. So long as each individual hash algorithm is considered safe, hopping between and within the output bytes of each to derive keystream XOR material should also be safe so long as hash output bytes are not re-used extensively.
On a modest test AMD (Linux amd_x64) encryption rates of approx. 140Mbits/s are achieved (-m 4). As this is a pure Go implementation and little effort has been put into optimization it is reasonable to expect higher rates could be achieved in the future.
$ time ./cmd -k "SuperSecret#@11ElevenTy" <blank700MB.bin >blank700MBenc.bin
real 0m40.096s
user 0m38.318s
sys 0m2.133s
@ -0,0 +1,166 @@
// Package hopscotch - a crypto doodle that uses multiple hash
// algorithm outputs as dynamic sbox/pbox material
// Properties visualized using https://github.com/circulosmeos/circle
package hopscotch
// TODOs:
// -define s-box rotation/shuffle schema
// -devise p-box schema
// ...
import (
mtwist "blitter.com/go/mtwist" // Used to derive hash fodder after seeding w/key
// hash algos must be manually imported thusly:
// (Would be nice if the golang pkg docs were more clear
// on this...)
_ "crypto/sha512"
b2b "golang.org/x/crypto/blake2b"
const (
maxResched = 10 // above 20 starts to show outlines in 'tuxtest' ... so 10 max
type Cipher struct {
resched int // lower (1) == stronger encryption; weakest (10) == weakest
rounds int
prng *mtwist.MT19937_64 // used to gen initial hash fodder from key
h []hash.Hash
hs []byte
r io.Reader
w io.Writer
idx int
ctr int
rekeyCtr int // must be min of len( c.h[] )
bTmp byte
k []byte
func New(r io.Reader, w io.Writer, resched int, key []byte) (c *Cipher) {
if resched < 1 || resched > maxResched {
resched = 4
c = &Cipher{}
c.resched = resched
c.rounds = 1
c.prng = mtwist.New()
c.r = r
c.w = w
if len(key) == 0 {
c.k = []byte(fmt.Sprintf("%s", time.Now()))
} else {
c.k = key
// Discard first 64 bytes of MT output
for idx := 0; idx < 64; idx++ {
_ = c.prng.Int63()
// Init all the hash algs we're going to 'hop' around with initial keystream
c.h = make([]hash.Hash, 2)
c.h[0] = sha512.New()
c.h[1], _ = b2b.New512(c.k)
c.rekeyCtr = len(c.hs) * c.resched // lower multiplier == greater security, lower speed
//fmt.Fprintf(os.Stderr, "rekeyCtr = %v\n", c.rekeyCtr)
return c
func (c *Cipher) Read(p []byte) (n int, err error) {
n, err = c.r.Read(p)
if err == nil {
for idx := 0; idx < n; idx++ {
p[idx] = c.yield(p[idx])
return n, err
func (c *Cipher) Write(p []byte) (n int, err error) {
n, err = c.w.Write(p)
return n, err
// Mutate the session key (intended to be called as encryption proceeds)
func (c *Cipher) keyUpdate(data []byte) {
//fmt.Fprintln(os.Stderr, "--rekey--")
sliceTmp := sha512.Sum512(data)
c.hs = sliceTmp[:]
sliceTmp := b2b.Sum512(data)
c.hs = append(c.hs, sliceTmp[:]...)
func (c *Cipher) yield(ib byte) (ob byte) {
c.idx = (c.ctr + c.idx + int(c.bTmp)) % len(c.hs)
c.bTmp = c.hs[c.idx]
c.ctr = c.ctr + 1
//fmt.Fprintf(os.Stderr, "[c.hidx:%v c.idx:%v]\n", c.hidx, c.idx)
// NOTE: using a non-prime modulus degrades CV % from ~ 0.055 to ~ 0.07
switch c.ctr % 3 {
case 0:
ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3] ^ c.hs[len(c.hs)-5] ^
c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17]
case 1:
ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
c.hs[len(c.hs)-5] ^ c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^
c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-29]
case 2:
ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3]
if c.ctr%c.rekeyCtr == 0 {
bufTmp := make([]byte, 32)
_, _ = c.prng.Read(bufTmp)
// XORKeyStream XORs each byte in the given slice with a byte from the
// cipher's key stream. Dst and src must overlap entirely or not at all.
// If len(dst) < len(src), XORKeyStream should panic. It is acceptable
// to pass a dst bigger than src, and in that case, XORKeyStream will
// only update dst[:len(src)] and will not touch the rest of dst.
// Multiple calls to XORKeyStream behave as if the concatenation of
// the src buffers was passed in a single run. That is, Stream
// maintains state and does not reset at each XORKeyStream call.
func (c *Cipher) XORKeyStream(dst, src []byte) {
//fmt.Printf("len dst:%d len src:%d\n", len(dst), len(src))
if len(dst) < len(src) {
panic(errors.New("len(dst) < len(src)"))
for idx, v := range src {
dst[idx] = c.yield(v)
@ -0,0 +1,2 @@
@ -0,0 +1,122 @@
Creative Commons Legal Code
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
@ -0,0 +1,24 @@
### Kyber - IND-CCA2-secure Key Encapsulation Mechanism
#### Yawning Angel (yawning at schwanenlied dot me)
This package implements the Kyber IND-CCA2-secure key encapsulation
mechanism (KEM), based on the hardness of solving the learning-with-errors
(LWE) problem over module lattices as submitted to the NIST Post-Quantum
Cryptography project.
This implementation is a port of the Public Domain reference implementation
by Joppe Bos, Léo Ducas, Eike Kiltz , Tancrède Lepoint, Vadim Lyubashevsky,
John Schanck, Peter Schwabe, Gregor Seiler, and Damien Stehlé.
Additionally implementations of Kyber.AKE and Kyber.UAKE as presented in
the Kyber paper are included for users that seek an authenticated key
Note that the algorithm is not finalized yet, and may change in a backward
incompatible manner in the future. The designers currently recommend
combining Kyber with an established pre-quantum algorithm like ECDH, and
using the Kyber-768 parameter set.
For more information, see the [project home page](https://pq-crystals.org/kyber/index.shtml).
@ -0,0 +1,100 @@
// cbd.go - Centered binomial distribution.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
// Load bytes into a 64-bit integer in little-endian order.
func loadLittleEndian(x []byte, bytes int) uint64 {
var r uint64
for i, v := range x[:bytes] {
r |= uint64(v) << (8 * uint(i))
return r
// Given an array of uniformly random bytes, compute polynomial with
// coefficients distributed according to a centered binomial distribution
// with parameter eta.
func (p *poly) cbd(buf []byte, eta int) {
hardwareAccelImpl.cbdFn(p, buf, eta)
func cbdRef(p *poly, buf []byte, eta int) {
switch eta {
case 3:
var a, b [4]uint32
for i := 0; i < kyberN/4; i++ {
t := loadLittleEndian(buf[3*i:], 3)
var d uint32
for j := 0; j < 3; j++ {
d += uint32((t >> uint(j)) & 0x249249)
a[0] = d & 0x7
b[0] = (d >> 3) & 0x7
a[1] = (d >> 6) & 0x7
b[1] = (d >> 9) & 0x7
a[2] = (d >> 12) & 0x7
b[2] = (d >> 15) & 0x7
a[3] = (d >> 18) & 0x7
b[3] = (d >> 21)
p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
case 4:
var a, b [4]uint32
for i := 0; i < kyberN/4; i++ {
t := loadLittleEndian(buf[4*i:], 4)
var d uint32
for j := 0; j < 4; j++ {
d += uint32((t >> uint(j)) & 0x11111111)
a[0] = d & 0xf
b[0] = (d >> 4) & 0xf
a[1] = (d >> 8) & 0xf
b[1] = (d >> 12) & 0xf
a[2] = (d >> 16) & 0xf
b[2] = (d >> 20) & 0xf
a[3] = (d >> 24) & 0xf
b[3] = (d >> 28)
p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
case 5:
var a, b [4]uint64
for i := 0; i < kyberN/4; i++ {
t := loadLittleEndian(buf[5*i:], 5)
var d uint64
for j := 0; j < 5; j++ {
d += (t >> uint(j)) & 0x0842108421
a[0] = d & 0x1f
b[0] = (d >> 5) & 0x1f
a[1] = (d >> 10) & 0x1f
b[1] = (d >> 15) & 0x1f
a[2] = (d >> 20) & 0x1f
b[2] = (d >> 25) & 0x1f
a[3] = (d >> 30) & 0x1f
b[3] = (d >> 35)
p.coeffs[4*i+0] = uint16(a[0] + kyberQ - b[0])
p.coeffs[4*i+1] = uint16(a[1] + kyberQ - b[1])
p.coeffs[4*i+2] = uint16(a[2] + kyberQ - b[2])
p.coeffs[4*i+3] = uint16(a[3] + kyberQ - b[3])
panic("kyber: eta must be in {3,4,5}")
@ -0,0 +1,27 @@
// doc.go - Kyber godoc extras.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
// Package kyber implements the Kyber IND-CCA2-secure key encapsulation
// mechanism (KEM), based on the hardness of solving the learning-with-errors
// (LWE) problem over module lattices as submitted to the NIST Post-Quantum
// Cryptography project.
// This implementation is a port of the Public Domain reference implementation
// by Joppe Bos, Léo Ducas, Eike Kiltz , Tancrède Lepoint, Vadim Lyubashevsky,
// John Schanck, Peter Schwabe, Gregor Seiler, and Damien Stehlé.
// Additionally implementations of Kyber.AKE and Kyber.UAKE as presented in
// the Kyber paper are included for users that seek an authenticated key
// exchange.
// Note that the algorithm is not finalized yet, and may change in a backward
// incompatible manner in the future. The designers currently recommend
// combining Kyber with an established pre-quantum algorithm like ECDH, and
// using the Kyber-768 parameter set.
// For more information, see https://pq-crystals.org/kyber/index.shtml.
package kyber
@ -0,0 +1,46 @@
// hwaccel.go - Hardware acceleration hooks.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
var (
isHardwareAccelerated = false
hardwareAccelImpl = implReference
implReference = &hwaccelImpl{
name: "Reference",
nttFn: nttRef,
invnttFn: invnttRef,
pointwiseAccFn: pointwiseAccRef,
cbdFn: cbdRef,
type hwaccelImpl struct {
name string
nttFn func(*[kyberN]uint16)
invnttFn func(*[kyberN]uint16)
pointwiseAccFn func(*poly, *polyVec, *polyVec)
cbdFn func(*poly, []byte, int)
func forceDisableHardwareAcceleration() {
// This is for the benefit of testing, so that it's possible to test
// all versions that are supported by the host.
isHardwareAccelerated = false
hardwareAccelImpl = implReference
// IsHardwareAccelerated returns true iff the Kyber implementation will use
// hardware acceleration (eg: AVX2).
func IsHardwareAccelerated() bool {
return isHardwareAccelerated
func init() {
@ -0,0 +1,256 @@
// hwaccel_amd64.go - AMD64 optimized routines.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
// +build amd64,!gccgo,!noasm,go1.10
package kyber
var zetasExp = [752]uint16{
3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777, 3777,
3777, 3777, 3777, 3777, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499,
4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 3625, 3625, 3625, 3625,
3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625, 3625,
3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985, 3985,
3985, 3985, 3985, 3985, 6581, 6581, 6581, 6581, 6581, 6581, 6581, 6581,
6581, 6581, 6581, 6581, 6581, 6581, 6581, 6581, 2456, 2456, 2456, 2456,
2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456,
2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194, 2194,
2194, 2194, 2194, 2194, 121, 121, 121, 121, 121, 121, 121, 121, 121,
121, 121, 121, 121, 121, 121, 121, 5431, 5431, 5431, 5431, 5431, 5431,
5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 5431, 834, 834,
834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834,
5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186, 5186,
5186, 5186, 5186, 5186, 5362, 5362, 5362, 5362, 5362, 5362, 5362, 5362,
5362, 5362, 5362, 5362, 5362, 5362, 5362, 5362, 2876, 2876, 2876, 2876,
2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876, 2876,
5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980, 5980,
5980, 5980, 5980, 5980, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414,
1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 2816, 2816, 2816, 2816,
2816, 2816, 2816, 2816, 5593, 5593, 5593, 5593, 5593, 5593, 5593, 5593,
5444, 5444, 5444, 5444, 5444, 5444, 5444, 5444, 1986, 1986, 1986, 1986,
1986, 1986, 1986, 1986, 6082, 6082, 6082, 6082, 6082, 6082, 6082, 6082,
1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 3706, 3706, 3706, 3706,
3706, 3706, 3706, 3706, 5675, 5675, 5675, 5675, 5675, 5675, 5675, 5675,
6156, 6156, 6156, 6156, 6156, 6156, 6156, 6156, 5124, 5124, 5124, 5124,
5124, 5124, 5124, 5124, 1296, 1296, 1296, 1296, 1296, 1296, 1296, 1296,
1483, 1483, 1483, 1483, 1483, 1483, 1483, 1483, 4851, 4851, 4851, 4851,
4851, 4851, 4851, 4851, 3364, 3364, 3364, 3364, 3364, 3364, 3364, 3364,
617, 617, 617, 617, 617, 617, 617, 617, 1921, 1921, 1921, 1921, 1921,
1921, 1921, 1921, 3992, 3992, 3992, 3992, 5943, 5943, 5943, 5943, 3266,
3266, 3266, 3266, 4081, 4081, 4081, 4081, 810, 810, 810, 810, 1887,
1887, 1887, 1887, 7043, 7043, 7043, 7043, 7674, 7674, 7674, 7674, 7243,
7243, 7243, 7243, 7002, 7002, 7002, 7002, 6376, 6376, 6376, 6376, 5921,
5921, 5921, 5921, 396, 396, 396, 396, 4507, 4507, 4507, 4507, 4126,
4126, 4126, 4126, 5800, 5800, 5800, 5800, 3772, 3772, 3772, 3772, 5146,
5146, 5146, 5146, 5241, 5241, 5241, 5241, 5126, 5126, 5126, 5126, 1535,
1535, 1535, 1535, 7132, 7132, 7132, 7132, 3153, 3153, 3153, 3153, 2310,
2310, 2310, 2310, 6282, 6282, 6282, 6282, 1321, 1321, 1321, 1321, 514,
514, 514, 514, 4725, 4725, 4725, 4725, 7578, 7578, 7578, 7578, 2804,
2804, 2804, 2804, 5638, 5638, 5638, 5638, 6250, 6250, 6250, 6250, 6627,
6627, 1698, 1698, 4225, 4225, 1166, 1166, 2426, 2426, 3831, 3831, 915,
915, 7679, 7679, 4264, 4264, 7487, 7487, 2919, 2919, 2789, 2789, 3405,
3405, 2385, 2385, 5568, 5568, 4949, 4949, 2175, 2175, 373, 373, 3692,
3692, 6951, 6951, 5925, 5925, 3135, 3135, 5290, 5290, 660, 660, 6184,
6184, 2572, 2572, 4536, 4536, 1350, 1350, 5457, 5457, 4093, 4093, 6000,
6000, 2883, 2883, 6291, 6291, 1598, 1598, 3750, 3750, 2762, 2762, 2835,
2835, 2764, 2764, 5448, 5448, 3816, 3816, 6148, 6148, 1464, 1464, 6954,
6954, 1521, 1521, 1386, 1386, 4253, 4253, 6760, 6760, 4938, 4938, 5521,
5521, 2649, 2649, 6822, 6822, 2579, 2579, 1532, 1532, 1919, 1919, 7195,
7195, 404, 404, 6625, 6625, 783, 783, 1799, 1799, 5016, 5016, 3480,
3480, 2133, 2133, 4371, 4371, 6513, 6513, 7664, 3744, 2422, 2001, 1278,
929, 6333, 5451, 7502, 6439, 5622, 6611, 2161, 1649, 2072, 3177, 5610,
1121, 7245, 236, 715, 670, 7023, 6205, 5303, 2767, 3542, 7455, 1203,
1181, 7530, 3887, 1712, 7459, 2786, 7230, 4134, 1779, 6530, 7247, 3568,
3988, 3581, 6095, 1509, 2918, 2339, 6274, 3434, 4131, 2340, 2891, 2998,
4367, 3461, 4962, 5434, 5092, 1144, 1072, 1295, 4866, 3911, 3450, 3781,
5423, 796, 3163, 4473, 7092, 2963, 7557, 3214, 3334, 4315, 3936, 3723,
1931, 7252, 7279, 4273, 83, 6155, 826, 6343, 2345, 5378, 2515, 7039,
5844, 4716, 6890, 370, 293, 3312, 2083, 5992, 6904, 2070, 2262, 6788,
2386, 7493, 6162, 4807, 6277, 1012, 2130, 1441, 2532, 4346, 6597, 4338,
2937, 509, 6278, 2812, 3763, 592, 2005, 3657, 2460, 4004, 3752, 692,
1669, 2167, 4394,
var zetasInvExp = [752]uint16{
3287, 5514, 6012, 6989, 3929, 3677, 5221, 4024, 5676, 7089, 3918, 4869,
1403, 7172, 4744, 3343, 1084, 3335, 5149, 6240, 5551, 6669, 1404, 2874,
1519, 188, 5295, 893, 5419, 5611, 777, 1689, 5598, 4369, 7388, 7311,
791, 2965, 1837, 642, 5166, 2303, 5336, 1338, 6855, 1526, 7598, 3408,
402, 429, 5750, 3958, 3745, 3366, 4347, 4467, 124, 4718, 589, 3208,
4518, 6885, 2258, 3900, 4231, 3770, 2815, 6386, 6609, 6537, 2589, 2247,
2719, 4220, 3314, 4683, 4790, 5341, 3550, 4247, 1407, 5342, 4763, 6172,
1586, 4100, 3693, 4113, 434, 1151, 5902, 3547, 451, 4895, 222, 5969,
3794, 151, 6500, 6478, 226, 4139, 4914, 2378, 1476, 658, 7011, 6966,
7445, 436, 6560, 2071, 4504, 5609, 6032, 5520, 1070, 2059, 1242, 179,
2230, 1348, 6752, 6403, 5680, 5259, 3937, 17, 1168, 1168, 3310, 3310,
5548, 5548, 4201, 4201, 2665, 2665, 5882, 5882, 6898, 6898, 1056, 1056,
7277, 7277, 486, 486, 5762, 5762, 6149, 6149, 5102, 5102, 859, 859,
5032, 5032, 2160, 2160, 2743, 2743, 921, 921, 3428, 3428, 6295, 6295,
6160, 6160, 727, 727, 6217, 6217, 1533, 1533, 3865, 3865, 2233, 2233,
4917, 4917, 4846, 4846, 4919, 4919, 3931, 3931, 6083, 6083, 1390, 1390,
4798, 4798, 1681, 1681, 3588, 3588, 2224, 2224, 6331, 6331, 3145, 3145,
5109, 5109, 1497, 1497, 7021, 7021, 2391, 2391, 4546, 4546, 1756, 1756,
730, 730, 3989, 3989, 7308, 7308, 5506, 5506, 2732, 2732, 2113, 2113,
5296, 5296, 4276, 4276, 4892, 4892, 4762, 4762, 194, 194, 3417, 3417, 2,
2, 6766, 6766, 3850, 3850, 5255, 5255, 6515, 6515, 3456, 3456, 5983,
5983, 1054, 1054, 1431, 1431, 1431, 1431, 2043, 2043, 2043, 2043, 4877,
4877, 4877, 4877, 103, 103, 103, 103, 2956, 2956, 2956, 2956, 7167,
7167, 7167, 7167, 6360, 6360, 6360, 6360, 1399, 1399, 1399, 1399, 5371,
5371, 5371, 5371, 4528, 4528, 4528, 4528, 549, 549, 549, 549, 6146,
6146, 6146, 6146, 2555, 2555, 2555, 2555, 2440, 2440, 2440, 2440, 2535,
2535, 2535, 2535, 3909, 3909, 3909, 3909, 1881, 1881, 1881, 1881, 3555,
3555, 3555, 3555, 3174, 3174, 3174, 3174, 7285, 7285, 7285, 7285, 1760,
1760, 1760, 1760, 1305, 1305, 1305, 1305, 679, 679, 679, 679, 438, 438,
438, 438, 7, 7, 7, 7, 638, 638, 638, 638, 5794, 5794, 5794, 5794, 6871,
6871, 6871, 6871, 3600, 3600, 3600, 3600, 4415, 4415, 4415, 4415, 1738,
1738, 1738, 1738, 3689, 3689, 3689, 3689, 5760, 5760, 5760, 5760, 5760,
5760, 5760, 5760, 7064, 7064, 7064, 7064, 7064, 7064, 7064, 7064, 4317,
4317, 4317, 4317, 4317, 4317, 4317, 4317, 2830, 2830, 2830, 2830, 2830,
2830, 2830, 2830, 6198, 6198, 6198, 6198, 6198, 6198, 6198, 6198, 6385,
6385, 6385, 6385, 6385, 6385, 6385, 6385, 2557, 2557, 2557, 2557, 2557,
2557, 2557, 2557, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 1525, 2006,
2006, 2006, 2006, 2006, 2006, 2006, 2006, 3975, 3975, 3975, 3975, 3975,
3975, 3975, 3975, 5688, 5688, 5688, 5688, 5688, 5688, 5688, 5688, 1599,
1599, 1599, 1599, 1599, 1599, 1599, 1599, 5695, 5695, 5695, 5695, 5695,
5695, 5695, 5695, 2237, 2237, 2237, 2237, 2237, 2237, 2237, 2237, 2088,
2088, 2088, 2088, 2088, 2088, 2088, 2088, 4865, 4865, 4865, 4865, 4865,
4865, 4865, 4865, 6267, 6267, 6267, 6267, 6267, 6267, 6267, 6267, 6267,
6267, 6267, 6267, 6267, 6267, 6267, 6267, 1701, 1701, 1701, 1701, 1701,
1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 1701, 4805,
4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805, 4805,
4805, 4805, 4805, 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319,
2319, 2319, 2319, 2319, 2319, 2319, 2319, 2495, 2495, 2495, 2495, 2495,
2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 2495, 6847,
6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847, 6847,
6847, 6847, 6847, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250,
2250, 2250, 2250, 2250, 2250, 2250, 2250, 7560, 7560, 7560, 7560, 7560,
7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 7560, 5487,
5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487, 5487,
5487, 5487, 5487, 5225, 5225, 5225, 5225, 5225, 5225, 5225, 5225, 5225,
5225, 5225, 5225, 5225, 5225, 5225, 5225, 1100, 1100, 1100, 1100, 1100,
1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 1100, 3696,
3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696, 3696,
3696, 3696, 3696, 4056, 4056, 4056, 4056, 4056, 4056, 4056, 4056, 4056,
4056, 4056, 4056, 4056, 4056, 4056, 4056, 3182, 3182, 3182, 3182, 3182,
3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 3182, 5776,
5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776, 5776,
5776, 5776, 5776,
func cpuidAmd64(cpuidParams *uint32)
func xgetbv0Amd64(xcrVec *uint32)
func nttAVX2(inout, zetas *uint16)
func invnttAVX2(inout, omegas *uint16)
func pointwiseAccK2AVX2(dst *uint16, a, b **uint16)
func pointwiseAccK3AVX2(dst *uint16, a, b **uint16)
func pointwiseAccK4AVX2(dst *uint16, a, b **uint16)
func cbdEta4AVX2(dst *uint16, buf *byte)
func supportsAVX2() bool {
// https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
const (
osXsaveBit = 1 << 27
avx2Bit = 1 << 5
// Check to see if CPUID actually supports the leaf that indicates AVX2.
// CPUID.(EAX=0H, ECX=0H) >= 7
regs := [4]uint32{0x00}
if regs[0] < 7 {
return false
// Check to see if the OS knows how to save/restore XMM/YMM state.
// CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
regs = [4]uint32{0x01}
if regs[2]&osXsaveBit == 0 {
return false
xcrRegs := [2]uint32{}
if xcrRegs[0]&6 != 6 {
return false
// Check for AVX2 support.
// CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
regs = [4]uint32{0x07}
return regs[1]&avx2Bit != 0
var implAVX2 = &hwaccelImpl{
name: "AVX2",
nttFn: nttYMM,
invnttFn: invnttYMM,
pointwiseAccFn: pointwiseAccYMM,
cbdFn: cbdYMM,
func nttYMM(p *[kyberN]uint16) {
nttAVX2(&p[0], &zetasExp[0])
func invnttYMM(a *[kyberN]uint16) {
invnttAVX2(&a[0], &zetasInvExp[0])
func pointwiseAccYMM(p *poly, a, b *polyVec) {
// Unlike the C code, a polyVec won't have the polys in contigious
// memory. So each assembly function takes vectors of pointers to
// each polyvec's polys.
// Kind of ugly, but it's the price to pay for flexibility...
var aVec, bVec [4]*uint16 // k is in {2,3,4}.
for i := range a.vec {
aVec[i] = &a.vec[i].coeffs[0]
bVec[i] = &b.vec[i].coeffs[0]
switch len(a.vec) {
case 2:
pointwiseAccK2AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
case 3:
pointwiseAccK3AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
case 4:
pointwiseAccK4AVX2(&p.coeffs[0], &aVec[0], &bVec[0])
func cbdYMM(p *poly, buf []byte, eta int) {
switch eta {
case 4:
cbdEta4AVX2(&p.coeffs[0], &buf[0])
cbdRef(p, buf, eta)
func initHardwareAcceleration() {
if supportsAVX2() {
isHardwareAccelerated = true
hardwareAccelImpl = implAVX2
// hwaccel_ref.go - Unaccelerated stubs.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
// +build !amd64 gccgo noasm !go1.10
package kyber
func initHardwareAcceleration() {
// indcpa.go - Kyber IND-CPA encryption.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
import (
// Serialize the public key as concatenation of the compressed and serialized
// vector of polynomials pk and the public seed used to generate the matrix A.
func packPublicKey(r []byte, pk *polyVec, seed []byte) {
copy(r[pk.compressedSize():], seed[:SymSize])
// De-serialize and decompress public key from a byte array; approximate
// inverse of packPublicKey.
func unpackPublicKey(pk *polyVec, seed, packedPk []byte) {
off := pk.compressedSize()
copy(seed, packedPk[off:off+SymSize])
// Serialize the ciphertext as concatenation of the compressed and serialized
// vector of polynomials b and the compressed and serialized polynomial v.
func packCiphertext(r []byte, b *polyVec, v *poly) {
// De-serialize and decompress ciphertext from a byte array; approximate
// inverse of packCiphertext.
func unpackCiphertext(b *polyVec, v *poly, c []byte) {
// Serialize the secret key.
func packSecretKey(r []byte, sk *polyVec) {
// De-serialize the secret key; inverse of packSecretKey.
func unpackSecretKey(sk *polyVec, packedSk []byte) {
// Deterministically generate matrix A (or the transpose of A) from a seed.
// Entries of the matrix are polynomials that look uniformly random. Performs
// rejection sampling on output of SHAKE-128.
func genMatrix(a []polyVec, seed []byte, transposed bool) {
const (
shake128Rate = 168 // xof.BlockSize() is not a constant.
maxBlocks = 4
var buf [shake128Rate * maxBlocks]byte
var extSeed [SymSize + 2]byte
copy(extSeed[:SymSize], seed)
xof := sha3.NewShake128()
for i, v := range a {
for j, p := range v.vec {
if transposed {
extSeed[SymSize] = byte(i)
extSeed[SymSize+1] = byte(j)
} else {
extSeed[SymSize] = byte(j)
extSeed[SymSize+1] = byte(i)
for ctr, pos, maxPos := 0, 0, len(buf); ctr < kyberN; {
val := (uint16(buf[pos]) | (uint16(buf[pos+1]) << 8)) & 0x1fff
if val < kyberQ {
p.coeffs[ctr] = val
if pos += 2; pos == maxPos {
// On the unlikely chance 4 blocks is insufficient,
// incrementally squeeze out 1 block at a time.
pos, maxPos = 0, shake128Rate
type indcpaPublicKey struct {
packed []byte
h [32]byte
func (pk *indcpaPublicKey) toBytes() []byte {
return pk.packed
func (pk *indcpaPublicKey) fromBytes(p *ParameterSet, b []byte) error {
if len(b) != p.indcpaPublicKeySize {
return ErrInvalidKeySize
pk.packed = make([]byte, len(b))
copy(pk.packed, b)
pk.h = sha3.Sum256(b)
return nil
type indcpaSecretKey struct {
packed []byte
func (sk *indcpaSecretKey) fromBytes(p *ParameterSet, b []byte) error {
if len(b) != p.indcpaSecretKeySize {
return ErrInvalidKeySize
sk.packed = make([]byte, len(b))
copy(sk.packed, b)
return nil
// Generates public and private key for the CPA-secure public-key encryption
// scheme underlying Kyber.
func (p *ParameterSet) indcpaKeyPair(rng io.Reader) (*indcpaPublicKey, *indcpaSecretKey, error) {
buf := make([]byte, SymSize+SymSize)
if _, err := io.ReadFull(rng, buf[:SymSize]); err != nil {
return nil, nil, err
sk := &indcpaSecretKey{
packed: make([]byte, p.indcpaSecretKeySize),
pk := &indcpaPublicKey{
packed: make([]byte, p.indcpaPublicKeySize),
h := sha3.New512()
buf = buf[:0] // Reuse the backing store.
buf = h.Sum(buf)
publicSeed, noiseSeed := buf[:SymSize], buf[SymSize:]
a := p.allocMatrix()
genMatrix(a, publicSeed, false)
var nonce byte
skpv := p.allocPolyVec()
for _, pv := range skpv.vec {
pv.getNoise(noiseSeed, nonce, p.eta)
e := p.allocPolyVec()
for _, pv := range e.vec {
pv.getNoise(noiseSeed, nonce, p.eta)
// matrix-vector multiplication
pkpv := p.allocPolyVec()
for i, pv := range pkpv.vec {
pv.pointwiseAcc(&skpv, &a[i])
pkpv.add(&pkpv, &e)
packSecretKey(sk.packed, &skpv)
packPublicKey(pk.packed, &pkpv, publicSeed)
pk.h = sha3.Sum256(pk.packed)
return pk, sk, nil
// Encryption function of the CPA-secure public-key encryption scheme
// underlying Kyber.
func (p *ParameterSet) indcpaEncrypt(c, m []byte, pk *indcpaPublicKey, coins []byte) {
var k, v, epp poly
var seed [SymSize]byte
pkpv := p.allocPolyVec()
unpackPublicKey(&pkpv, seed[:], pk.packed)
at := p.allocMatrix()
genMatrix(at, seed[:], true)
var nonce byte
sp := p.allocPolyVec()
for _, pv := range sp.vec {
pv.getNoise(coins, nonce, p.eta)
ep := p.allocPolyVec()
for _, pv := range ep.vec {
pv.getNoise(coins, nonce, p.eta)
// matrix-vector multiplication
bp := p.allocPolyVec()
for i, pv := range bp.vec {
pv.pointwiseAcc(&sp, &at[i])
bp.add(&bp, &ep)
v.pointwiseAcc(&pkpv, &sp)
epp.getNoise(coins, nonce, p.eta) // Don't need to increment nonce.
v.add(&v, &epp)
v.add(&v, &k)
packCiphertext(c, &bp, &v)
// Decryption function of the CPA-secure public-key encryption scheme
// underlying Kyber.
func (p *ParameterSet) indcpaDecrypt(m, c []byte, sk *indcpaSecretKey) {
var v, mp poly
skpv, bp := p.allocPolyVec(), p.allocPolyVec()
unpackCiphertext(&bp, &v, c)
unpackSecretKey(&skpv, sk.packed)
mp.pointwiseAcc(&skpv, &bp)
mp.sub(&mp, &v)
func (p *ParameterSet) allocMatrix() []polyVec {
m := make([]polyVec, 0, p.k)
for i := 0; i < p.k; i++ {
m = append(m, p.allocPolyVec())
return m
func (p *ParameterSet) allocPolyVec() polyVec {
vec := make([]*poly, 0, p.k)
for i := 0; i < p.k; i++ {
vec = append(vec, new(poly))
return polyVec{vec}
// kem.go - Kyber key encapsulation mechanism.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
import (
var (
// ErrInvalidKeySize is the error returned when a byte serailized key is
// an invalid size.
ErrInvalidKeySize = errors.New("kyber: invalid key size")
// ErrInvalidCipherTextSize is the error thrown via a panic when a byte
// serialized ciphertext is an invalid size.
ErrInvalidCipherTextSize = errors.New("kyber: invalid ciphertext size")
// ErrInvalidPrivateKey is the error returned when a byte serialized
// private key is malformed.
ErrInvalidPrivateKey = errors.New("kyber: invalid private key")
// PrivateKey is a Kyber private key.
type PrivateKey struct {
sk *indcpaSecretKey
z []byte
// Bytes returns the byte serialization of a PrivateKey.
func (sk *PrivateKey) Bytes() []byte {
p := sk.PublicKey.p
b := make([]byte, 0, p.secretKeySize)
b = append(b, sk.sk.packed...)
b = append(b, sk.PublicKey.pk.packed...)
b = append(b, sk.PublicKey.pk.h[:]...)
b = append(b, sk.z...)
return b
// PrivateKeyFromBytes deserializes a byte serialized PrivateKey.
func (p *ParameterSet) PrivateKeyFromBytes(b []byte) (*PrivateKey, error) {
if len(b) != p.secretKeySize {
return nil, ErrInvalidKeySize
sk := new(PrivateKey)
sk.sk = new(indcpaSecretKey)
sk.z = make([]byte, SymSize)
sk.PublicKey.pk = new(indcpaPublicKey)
sk.PublicKey.p = p
// De-serialize the public key first.
off := p.indcpaSecretKeySize
if err := sk.PublicKey.pk.fromBytes(p, b[off:off+p.publicKeySize]); err != nil {
return nil, err
off += p.publicKeySize
if !bytes.Equal(sk.PublicKey.pk.h[:], b[off:off+SymSize]) {
return nil, ErrInvalidPrivateKey
off += SymSize
copy(sk.z, b[off:])
// Then go back to de-serialize the private key.
if err := sk.sk.fromBytes(p, b[:p.indcpaSecretKeySize]); err != nil {
return nil, err
return sk, nil
// PublicKey is a Kyber public key.
type PublicKey struct {
pk *indcpaPublicKey
p *ParameterSet
// Bytes returns the byte serialization of a PublicKey.
func (pk *PublicKey) Bytes() []byte {
return pk.pk.toBytes()
// PublicKeyFromBytes deserializes a byte serialized PublicKey.
func (p *ParameterSet) PublicKeyFromBytes(b []byte) (*PublicKey, error) {
pk := &PublicKey{
pk: new(indcpaPublicKey),
p: p,
if err := pk.pk.fromBytes(p, b); err != nil {
return nil, err
return pk, nil
// GenerateKeyPair generates a private and public key parameterized with the
// given ParameterSet.
func (p *ParameterSet) GenerateKeyPair(rng io.Reader) (*PublicKey, *PrivateKey, error) {
kp := new(PrivateKey)
var err error
if kp.PublicKey.pk, kp.sk, err = p.indcpaKeyPair(rng); err != nil {
return nil, nil, err
kp.PublicKey.p = p
kp.z = make([]byte, SymSize)
if _, err := io.ReadFull(rng, kp.z); err != nil {
return nil, nil, err
return &kp.PublicKey, kp, nil
// KEMEncrypt generates cipher text and shared secret via the CCA-secure Kyber
// key encapsulation mechanism.
func (pk *PublicKey) KEMEncrypt(rng io.Reader) (cipherText []byte, sharedSecret []byte, err error) {
var buf [SymSize]byte
if _, err = io.ReadFull(rng, buf[:]); err != nil {
return nil, nil, err
buf = sha3.Sum256(buf[:]) // Don't release system RNG output
hKr := sha3.New512()
hKr.Write(pk.pk.h[:]) // Multitarget countermeasures for coins + contributory KEM
kr := hKr.Sum(nil)
cipherText = make([]byte, pk.p.cipherTextSize)
pk.p.indcpaEncrypt(cipherText, buf[:], pk.pk, kr[SymSize:]) // coins are in kr[SymSize:]
hc := sha3.Sum256(cipherText)
copy(kr[SymSize:], hc[:]) // overwrite coins in kr with H(c)
hSs := sha3.New256()
sharedSecret = hSs.Sum(nil) // hash concatenation of pre-k and H(c) to k
// KEMDecrypt generates shared secret for given cipher text via the CCA-secure
// Kyber key encapsulation mechanism.
// On failures, sharedSecret will contain a randomized value. Providing a
// cipher text that is obviously malformed (too large/small) will result in a
// panic.
func (sk *PrivateKey) KEMDecrypt(cipherText []byte) (sharedSecret []byte) {
var buf [2 * SymSize]byte
p := sk.PublicKey.p
if len(cipherText) != p.CipherTextSize() {
p.indcpaDecrypt(buf[:SymSize], cipherText, sk.sk)
copy(buf[SymSize:], sk.PublicKey.pk.h[:]) // Multitarget countermeasure for coins + contributory KEM
kr := sha3.Sum512(buf[:])
cmp := make([]byte, p.cipherTextSize)
p.indcpaEncrypt(cmp, buf[:SymSize], sk.PublicKey.pk, kr[SymSize:]) // coins are in kr[SymSize:]
hc := sha3.Sum256(cipherText)
copy(kr[SymSize:], hc[:]) // overwrite coins in kr with H(c)
fail := subtle.ConstantTimeSelect(subtle.ConstantTimeCompare(cipherText, cmp), 0, 1)
subtle.ConstantTimeCopy(fail, kr[SymSize:], sk.z) // Overwrite pre-k with z on re-encryption failure
h := sha3.New256()
sharedSecret = h.Sum(nil)
// kex.go - Kyber key exchange.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
import (
var (
// ErrInvalidMessageSize is the error thrown via a panic when a initator
// or responder message is an invalid size.
ErrInvalidMessageSize = errors.New("kyber: invalid message size")
// ErrParameterSetMismatch is the error thrown via a panic when there
// is a mismatch between parameter sets.
ErrParameterSetMismatch = errors.New("kyber: parameter set mismatch")
// UAKEInitiatorMessageSize returns the size of the initiator UAKE message
// in bytes.
func (p *ParameterSet) UAKEInitiatorMessageSize() int {
return p.PublicKeySize() + p.CipherTextSize()
// UAKEResponderMessageSize returns the size of the responder UAKE message
// in bytes.
func (p *ParameterSet) UAKEResponderMessageSize() int {
return p.CipherTextSize()
// UAKEInitiatorState is a initiator UAKE instance. Each instance MUST only
// be used for one key exchange and never reused.
type UAKEInitiatorState struct {
// Message is the UAKE message to send to the responder.
Message []byte
eSk *PrivateKey
tk []byte
// Shared generates a shared secret for the given UAKE instance and responder
// message.
// On failures, sharedSecret will contain a randomized value. Providing a
// cipher text that is obviously malformed (too large/small) will result in a
// panic.
func (s *UAKEInitiatorState) Shared(recv []byte) (sharedSecret []byte) {
xof := sha3.NewShake256()
var tk []byte
tk = s.eSk.KEMDecrypt(recv)
sharedSecret = make([]byte, SymSize)
// NewUAKEInitiatorState creates a new initiator UAKE instance.
func (pk *PublicKey) NewUAKEInitiatorState(rng io.Reader) (*UAKEInitiatorState, error) {
s := new(UAKEInitiatorState)
s.Message = make([]byte, 0, pk.p.UAKEInitiatorMessageSize())
var err error
_, s.eSk, err = pk.p.GenerateKeyPair(rng)
if err != nil {
return nil, err
s.Message = append(s.Message, s.eSk.PublicKey.Bytes()...)
var ct []byte
ct, s.tk, err = pk.KEMEncrypt(rng)
if err != nil {
return nil, err
s.Message = append(s.Message, ct...)
return s, nil
// UAKEResponderShared generates a responder message and shared secret given
// a initiator UAKE message.
// On failures, sharedSecret will contain a randomized value. Providing a
// cipher text that is obviously malformed (too large/small) will result in a
// panic.
func (sk *PrivateKey) UAKEResponderShared(rng io.Reader, recv []byte) (message, sharedSecret []byte) {
p := sk.PublicKey.p
pkLen := p.PublicKeySize()
// Deserialize the peer's ephemeral public key.
if len(recv) != p.UAKEInitiatorMessageSize() {
rawPk, ct := recv[:pkLen], recv[pkLen:]
pk, err := p.PublicKeyFromBytes(rawPk)
if err != nil {
xof := sha3.NewShake256()
var tk []byte
message, tk, err = pk.KEMEncrypt(rng)
if err != nil {
tk = sk.KEMDecrypt(ct)
sharedSecret = make([]byte, SymSize)
// AKEInitiatorMessageSize returns the size of the initiator AKE message
// in bytes.
func (p *ParameterSet) AKEInitiatorMessageSize() int {
return p.PublicKeySize() + p.CipherTextSize()
// AKEResponderMessageSize returns the size of the responder AKE message
// in bytes.
func (p *ParameterSet) AKEResponderMessageSize() int {
return 2 * p.CipherTextSize()
// AKEInitiatorState is a initiator AKE instance. Each instance MUST only be
// used for one key exchange and never reused.
type AKEInitiatorState struct {
// Message is the AKE message to send to the responder.
Message []byte
eSk *PrivateKey
tk []byte
// Shared generates a shared secret for the given AKE instance, responder
// message, and long term initiator private key.
// On failures sharedSecret will contain a randomized value. Providing a
// malformed responder message, or a private key that uses a different
// ParamterSet than the AKEInitiatorState will result in a panic.
func (s *AKEInitiatorState) Shared(recv []byte, initiatorPrivateKey *PrivateKey) (sharedSecret []byte) {
p := s.eSk.PublicKey.p
if initiatorPrivateKey.PublicKey.p != p {
if len(recv) != p.AKEResponderMessageSize() {
ctLen := p.CipherTextSize()
xof := sha3.NewShake256()
var tk []byte
tk = s.eSk.KEMDecrypt(recv[:ctLen])
tk = initiatorPrivateKey.KEMDecrypt(recv[ctLen:])
sharedSecret = make([]byte, SymSize)
// NewAKEInitiatorState creates a new initiator AKE instance.
func (pk *PublicKey) NewAKEInitiatorState(rng io.Reader) (*AKEInitiatorState, error) {
s := new(AKEInitiatorState)
// This is identical to the UAKE case, so just reuse the code.
us, err := pk.NewUAKEInitiatorState(rng)
if err != nil {
return nil, err
s.Message = us.Message
s.eSk = us.eSk
s.tk = us.tk
return s, nil
// AKEResponderShared generates a responder message and shared secret given
// a initiator AKE message and long term initiator public key.
// On failures sharedSecret will contain a randomized value. Providing a
// malformed responder message, or a private key that uses a different
// ParamterSet than the AKEInitiatorState will result in a panic.
func (sk *PrivateKey) AKEResponderShared(rng io.Reader, recv []byte, peerPublicKey *PublicKey) (message, sharedSecret []byte) {
p := sk.PublicKey.p
pkLen := p.PublicKeySize()
if peerPublicKey.p != p {
// Deserialize the peer's ephemeral public key.
if len(recv) != p.AKEInitiatorMessageSize() {
rawPk, ct := recv[:pkLen], recv[pkLen:]
pk, err := p.PublicKeyFromBytes(rawPk)
if err != nil {
message = make([]byte, 0, p.AKEResponderMessageSize())
xof := sha3.NewShake256()
var tk, tmp []byte
tmp, tk, err = pk.KEMEncrypt(rng)
if err != nil {
message = append(message, tmp...)
tmp, tk, err = peerPublicKey.KEMEncrypt(rng)
if err != nil {
message = append(message, tmp...)
tk = sk.KEMDecrypt(ct)
sharedSecret = make([]byte, SymSize)
// ntt.go - Number-Theoretic Transform.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
// Computes negacyclic number-theoretic transform (NTT) of a polynomial (vector
// of 256 coefficients) in place; inputs assumed to be in normal order, output
// in bitreversed order.
func nttRef(p *[kyberN]uint16) {
var j int
k := 1
for level := 7; level >= 0; level-- {
distance := 1 << uint(level)
for start := 0; start < kyberN; start = j + distance {
zeta := zetas[k]
for j = start; j < start+distance; j++ {
t := montgomeryReduce(uint32(zeta) * uint32(p[j+distance]))
p[j+distance] = barrettReduce(p[j] + 4*kyberQ - t)
if level&1 == 1 { // odd level
p[j] = p[j] + t // Omit reduction (be lazy)
} else {
p[j] = barrettReduce(p[j] + t)
// Computes inverse of negacyclic number-theoretic transform (NTT) of a
// polynomial (vector of 256 coefficients) in place; inputs assumed to be in
// bitreversed order, output in normal order.
func invnttRef(a *[kyberN]uint16) {
for level := 0; level < 8; level++ {
distance := 1 << uint(level)
for start := 0; start < distance; start++ {
var jTwiddle int
for j := start; j < kyberN-1; j += 2 * distance {
w := uint32(omegasInvBitrevMontgomery[jTwiddle])
temp := a[j]
if level&1 == 1 { // odd level
a[j] = barrettReduce(temp + a[j+distance])
} else {
a[j] = temp + a[j+distance] // Omit reduction (be lazy)
t := w * (uint32(temp) + 4*kyberQ - uint32(a[j+distance]))
a[j+distance] = montgomeryReduce(t)
for i, v := range psisInvMontgomery {
a[i] = montgomeryReduce(uint32(a[i]) * uint32(v))
// params.go - Kyber parameterization.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
const (
// SymSize is the size of the shared key (and certain internal parameters
// such as hashes and seeds) in bytes.
SymSize = 32
kyberN = 256
kyberQ = 7681
polySize = 416
polyCompressedSize = 96
compressedCoeffSize = 352
var (
// Kyber512 is the Kyber-512 parameter set, which aims to provide security
// equivalent to AES-128.
// This parameter set has a 1632 byte private key, 736 byte public key,
// and a 800 byte cipher text.
Kyber512 = newParameterSet("Kyber-512", 2)
// Kyber768 is the Kyber-768 parameter set, which aims to provide security
// equivalent to AES-192.
// This parameter set has a 2400 byte private key, 1088 byte public key,
// and a 1152 byte cipher text.
Kyber768 = newParameterSet("Kyber-768", 3)
// Kyber1024 is the Kyber-1024 parameter set, which aims to provide
// security equivalent to AES-256.
// This parameter set has a 3168 byte private key, 1440 byte public key,
// and a 1504 byte cipher text.
Kyber1024 = newParameterSet("Kyber-1024", 4)
// ParameterSet is a Kyber parameter set.
type ParameterSet struct {
name string
k int
eta int
polyVecSize int
polyVecCompressedSize int
indcpaMsgSize int
indcpaPublicKeySize int
indcpaSecretKeySize int
indcpaSize int
publicKeySize int
secretKeySize int
cipherTextSize int
// Name returns the name of a given ParameterSet.
func (p *ParameterSet) Name() string {
return p.name
// PublicKeySize returns the size of a public key in bytes.
func (p *ParameterSet) PublicKeySize() int {
return p.publicKeySize
// PrivateKeySize returns the size of a private key in bytes.
func (p *ParameterSet) PrivateKeySize() int {
return p.secretKeySize
// CipherTextSize returns the size of a cipher text in bytes.
func (p *ParameterSet) CipherTextSize() int {
return p.cipherTextSize
func newParameterSet(name string, k int) *ParameterSet {
var p ParameterSet
p.name = name
p.k = k
switch k {
case 2:
p.eta = 5
case 3:
p.eta = 4
case 4:
p.eta = 3
panic("kyber: k must be in {2,3,4}")
p.polyVecSize = k * polySize
p.polyVecCompressedSize = k * compressedCoeffSize
p.indcpaMsgSize = SymSize
p.indcpaPublicKeySize = p.polyVecCompressedSize + SymSize
p.indcpaSecretKeySize = p.polyVecSize
p.indcpaSize = p.polyVecCompressedSize + polyCompressedSize
p.publicKeySize = p.indcpaPublicKeySize
p.secretKeySize = p.indcpaSecretKeySize + p.indcpaPublicKeySize + 2*SymSize // 32 bytes of additional space to save H(pk)
p.cipherTextSize = p.indcpaSize
return &p
// poly.go - Kyber polynomial.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
import "golang.org/x/crypto/sha3"
// Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial coeffs[0] +
// X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1].
type poly struct {
coeffs [kyberN]uint16
// Compression and subsequent serialization of a polynomial.
func (p *poly) compress(r []byte) {
var t [8]uint32
for i, k := 0, 0; i < kyberN; i, k = i+8, k+3 {
for j := 0; j < 8; j++ {
t[j] = uint32((((freeze(p.coeffs[i+j]) << 3) + kyberQ/2) / kyberQ) & 7)
r[k] = byte(t[0] | (t[1] << 3) | (t[2] << 6))
r[k+1] = byte((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7))
r[k+2] = byte((t[5] >> 1) | (t[6] << 2) | (t[7] << 5))
// De-serialization and subsequent decompression of a polynomial; approximate
// inverse of poly.compress().
func (p *poly) decompress(a []byte) {
for i, off := 0, 0; i < kyberN; i, off = i+8, off+3 {
p.coeffs[i+0] = ((uint16(a[off]&7) * kyberQ) + 4) >> 3
p.coeffs[i+1] = (((uint16(a[off]>>3) & 7) * kyberQ) + 4) >> 3
p.coeffs[i+2] = (((uint16(a[off]>>6) | (uint16(a[off+1]<<2) & 4)) * kyberQ) + 4) >> 3
p.coeffs[i+3] = (((uint16(a[off+1]>>1) & 7) * kyberQ) + 4) >> 3
p.coeffs[i+4] = (((uint16(a[off+1]>>4) & 7) * kyberQ) + 4) >> 3
p.coeffs[i+5] = (((uint16(a[off+1]>>7) | (uint16(a[off+2]<<1) & 6)) * kyberQ) + 4) >> 3
p.coeffs[i+6] = (((uint16(a[off+2]>>2) & 7) * kyberQ) + 4) >> 3
p.coeffs[i+7] = (((uint16(a[off+2] >> 5)) * kyberQ) + 4) >> 3
// Serialization of a polynomial.
func (p *poly) toBytes(r []byte) {
var t [8]uint16
for i := 0; i < kyberN/8; i++ {
for j := 0; j < 8; j++ {
t[j] = freeze(p.coeffs[8*i+j])
r[13*i+0] = byte(t[0] & 0xff)
r[13*i+1] = byte((t[0] >> 8) | ((t[1] & 0x07) << 5))
r[13*i+2] = byte((t[1] >> 3) & 0xff)
r[13*i+3] = byte((t[1] >> 11) | ((t[2] & 0x3f) << 2))
r[13*i+4] = byte((t[2] >> 6) | ((t[3] & 0x01) << 7))
r[13*i+5] = byte((t[3] >> 1) & 0xff)
r[13*i+6] = byte((t[3] >> 9) | ((t[4] & 0x0f) << 4))
r[13*i+7] = byte((t[4] >> 4) & 0xff)
r[13*i+8] = byte((t[4] >> 12) | ((t[5] & 0x7f) << 1))
r[13*i+9] = byte((t[5] >> 7) | ((t[6] & 0x03) << 6))
r[13*i+10] = byte((t[6] >> 2) & 0xff)
r[13*i+11] = byte((t[6] >> 10) | ((t[7] & 0x1f) << 3))
r[13*i+12] = byte(t[7] >> 5)
// De-serialization of a polynomial; inverse of poly.toBytes().
func (p *poly) fromBytes(a []byte) {
for i := 0; i < kyberN/8; i++ {
p.coeffs[8*i+0] = uint16(a[13*i+0]) | ((uint16(a[13*i+1]) & 0x1f) << 8)
p.coeffs[8*i+1] = (uint16(a[13*i+1]) >> 5) | (uint16(a[13*i+2]) << 3) | ((uint16(a[13*i+3]) & 0x03) << 11)
p.coeffs[8*i+2] = (uint16(a[13*i+3]) >> 2) | ((uint16(a[13*i+4]) & 0x7f) << 6)
p.coeffs[8*i+3] = (uint16(a[13*i+4]) >> 7) | (uint16(a[13*i+5]) << 1) | ((uint16(a[13*i+6]) & 0x0f) << 9)
p.coeffs[8*i+4] = (uint16(a[13*i+6]) >> 4) | (uint16(a[13*i+7]) << 4) | ((uint16(a[13*i+8]) & 0x01) << 12)
p.coeffs[8*i+5] = (uint16(a[13*i+8]) >> 1) | ((uint16(a[13*i+9]) & 0x3f) << 7)
p.coeffs[8*i+6] = (uint16(a[13*i+9]) >> 6) | (uint16(a[13*i+10]) << 2) | ((uint16(a[13*i+11]) & 0x07) << 10)
p.coeffs[8*i+7] = (uint16(a[13*i+11]) >> 3) | (uint16(a[13*i+12]) << 5)
// Convert 32-byte message to polynomial.
func (p *poly) fromMsg(msg []byte) {
for i, v := range msg[:SymSize] {
for j := 0; j < 8; j++ {
mask := -((uint16(v) >> uint(j)) & 1)
p.coeffs[8*i+j] = mask & ((kyberQ + 1) / 2)
// Convert polynomial to 32-byte message.
func (p *poly) toMsg(msg []byte) {
for i := 0; i < SymSize; i++ {
msg[i] = 0
for j := 0; j < 8; j++ {
t := (((freeze(p.coeffs[8*i+j]) << 1) + kyberQ/2) / kyberQ) & 1
msg[i] |= byte(t << uint(j))
// Sample a polynomial deterministically from a seed and a nonce, with output
// polynomial close to centered binomial distribution with parameter eta.
func (p *poly) getNoise(seed []byte, nonce byte, eta int) {
extSeed := make([]byte, 0, SymSize+1)
extSeed = append(extSeed, seed...)
extSeed = append(extSeed, nonce)
buf := make([]byte, eta*kyberN/4)
sha3.ShakeSum256(buf, extSeed)
p.cbd(buf, eta)
// Computes negacyclic number-theoretic transform (NTT) of a polynomial in
// place; inputs assumed to be in normal order, output in bitreversed order.
func (p *poly) ntt() {
// Computes inverse of negacyclic number-theoretic transform (NTT) of a
// polynomial in place; inputs assumed to be in bitreversed order, output in
// normal order.
func (p *poly) invntt() {
// Add two polynomials.
func (p *poly) add(a, b *poly) {
for i := range p.coeffs {
p.coeffs[i] = barrettReduce(a.coeffs[i] + b.coeffs[i])
// Subtract two polynomials.
func (p *poly) sub(a, b *poly) {
for i := range p.coeffs {
p.coeffs[i] = barrettReduce(a.coeffs[i] + 3*kyberQ - b.coeffs[i])
// polyvec.go - Vector of Kyber polynomials.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
type polyVec struct {
vec []*poly
// Compress and serialize vector of polynomials.
func (v *polyVec) compress(r []byte) {
var off int
for _, vec := range v.vec {
for j := 0; j < kyberN/8; j++ {
var t [8]uint16
for k := 0; k < 8; k++ {
t[k] = uint16((((uint32(freeze(vec.coeffs[8*j+k])) << 11) + kyberQ/2) / kyberQ) & 0x7ff)
r[off+11*j+0] = byte(t[0] & 0xff)
r[off+11*j+1] = byte((t[0] >> 8) | ((t[1] & 0x1f) << 3))
r[off+11*j+2] = byte((t[1] >> 5) | ((t[2] & 0x03) << 6))
r[off+11*j+3] = byte((t[2] >> 2) & 0xff)
r[off+11*j+4] = byte((t[2] >> 10) | ((t[3] & 0x7f) << 1))
r[off+11*j+5] = byte((t[3] >> 7) | ((t[4] & 0x0f) << 4))
r[off+11*j+6] = byte((t[4] >> 4) | ((t[5] & 0x01) << 7))
r[off+11*j+7] = byte((t[5] >> 1) & 0xff)
r[off+11*j+8] = byte((t[5] >> 9) | ((t[6] & 0x3f) << 2))
r[off+11*j+9] = byte((t[6] >> 6) | ((t[7] & 0x07) << 5))
r[off+11*j+10] = byte((t[7] >> 3))
off += compressedCoeffSize
// De-serialize and decompress vector of polynomials; approximate inverse of
// polyVec.compress().
func (v *polyVec) decompress(a []byte) {
var off int
for _, vec := range v.vec {
for j := 0; j < kyberN/8; j++ {
vec.coeffs[8*j+0] = uint16((((uint32(a[off+11*j+0]) | ((uint32(a[off+11*j+1]) & 0x07) << 8)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+1] = uint16(((((uint32(a[off+11*j+1]) >> 3) | ((uint32(a[off+11*j+2]) & 0x3f) << 5)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+2] = uint16(((((uint32(a[off+11*j+2]) >> 6) | ((uint32(a[off+11*j+3]) & 0xff) << 2) | ((uint32(a[off+11*j+4]) & 0x01) << 10)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+3] = uint16(((((uint32(a[off+11*j+4]) >> 1) | ((uint32(a[off+11*j+5]) & 0x0f) << 7)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+4] = uint16(((((uint32(a[off+11*j+5]) >> 4) | ((uint32(a[off+11*j+6]) & 0x7f) << 4)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+5] = uint16(((((uint32(a[off+11*j+6]) >> 7) | ((uint32(a[off+11*j+7]) & 0xff) << 1) | ((uint32(a[off+11*j+8]) & 0x03) << 9)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+6] = uint16(((((uint32(a[off+11*j+8]) >> 2) | ((uint32(a[off+11*j+9]) & 0x1f) << 6)) * kyberQ) + 1024) >> 11)
vec.coeffs[8*j+7] = uint16(((((uint32(a[off+11*j+9]) >> 5) | ((uint32(a[off+11*j+10]) & 0xff) << 3)) * kyberQ) + 1024) >> 11)
off += compressedCoeffSize
// Serialize vector of polynomials.
func (v *polyVec) toBytes(r []byte) {
for i, p := range v.vec {
// De-serialize vector of polynomials; inverse of polyVec.toBytes().
func (v *polyVec) fromBytes(a []byte) {
for i, p := range v.vec {
// Apply forward NTT to all elements of a vector of polynomials.
func (v *polyVec) ntt() {
for _, p := range v.vec {
// Apply inverse NTT to all elements of a vector of polynomials.
func (v *polyVec) invntt() {
for _, p := range v.vec {
// Pointwise multiply elements of a and b and accumulate into p.
func (p *poly) pointwiseAcc(a, b *polyVec) {
hardwareAccelImpl.pointwiseAccFn(p, a, b)
// Add vectors of polynomials.
func (v *polyVec) add(a, b *polyVec) {
for i, p := range v.vec {
p.add(a.vec[i], b.vec[i])
// Get compressed and serialized size in bytes.
func (v *polyVec) compressedSize() int {
return len(v.vec) * compressedCoeffSize
func pointwiseAccRef(p *poly, a, b *polyVec) {
for j := 0; j < kyberN; j++ {
t := montgomeryReduce(4613 * uint32(b.vec[0].coeffs[j])) // 4613 = 2^{2*18} % q
p.coeffs[j] = montgomeryReduce(uint32(a.vec[0].coeffs[j]) * uint32(t))
for i := 1; i < len(a.vec); i++ { // len(a.vec) == kyberK
t = montgomeryReduce(4613 * uint32(b.vec[i].coeffs[j]))
p.coeffs[j] += montgomeryReduce(uint32(a.vec[i].coeffs[j]) * uint32(t))
p.coeffs[j] = barrettReduce(p.coeffs[j])
// precomp.go - Precomputed NTT constants.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
// Precomputed constants for the forward NTT and inverse NTT.
// Computed using Pari/GP as follows:
// brv=[0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240, \
// 8,136,72,200,40,168,104,232,24,152,88,216,56,184,120,248, \
// 4,132,68,196,36,164,100,228,20,148,84,212,52,180,116,244, \
// 12,140,76,204,44,172,108,236,28,156,92,220,60,188,124,252, \
// 2,130,66,194,34,162,98,226,18,146,82,210,50,178,114,242, \
// 10,138,74,202,42,170,106,234,26,154,90,218,58,186,122,250, \
// 6,134,70,198,38,166,102,230,22,150,86,214,54,182,118,246, \
// 14,142,78,206,46,174,110,238,30,158,94,222,62,190,126,254, \
// 1,129,65,193,33,161,97,225,17,145,81,209,49,177,113,241, \
// 9,137,73,201,41,169,105,233,25,153,89,217,57,185,121,249, \
// 5,133,69,197,37,165,101,229,21,149,85,213,53,181,117,245, \
// 13,141,77,205,45,173,109,237,29,157,93,221,61,189,125,253, \
// 3,131,67,195,35,163,99,227,19,147,83,211,51,179,115,243, \
// 11,139,75,203,43,171,107,235,27,155,91,219,59,187,123,251, \
// 7,135,71,199,39,167,103,231,23,151,87,215,55,183,119,247, \
// 15,143,79,207,47,175,111,239,31,159,95,223,63,191,127,255];
// q = 7681;
// n = 256;
// mont = Mod(2^18,q);
// g=0; for(i=2,q-1,if(znorder(Mod(i,q)) == 2*n, g=Mod(i,q); break))
// zetas = lift(vector(n, i, g^(brv[i])*mont))
// omegas_inv_bitrev_montgomery = lift(vector(n/2, i, (g^2)^(-brv[2*(i-1)+1])*mont))
// psis_inv_montgomery = lift(vector(n, i, g^(-(i-1))/n*mont))
var zetas = [kyberN]uint16{
990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656,
3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3,
606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157,
7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957,
3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434,
1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851,
2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390,
6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009,
7613, 7295, 2007, 323, 5112, 3716, 2289, 6442, 6965, 2713, 7126, 3401, 963, 6596, 607, 5027,
7078, 4484, 5937, 944, 2860, 2680, 5049, 1777, 5850, 3387, 6487, 6777, 4812, 4724, 7077, 186,
6848, 6793, 3463, 5877, 1174, 7116, 3077, 5945, 6591, 590, 6643, 1337, 6036, 3991, 1675, 2053,
6055, 1162, 1679, 3883, 4311, 2106, 6163, 4486, 6374, 5006, 4576, 4288, 5180, 4102, 282, 6119,
7443, 6330, 3184, 4971, 2530, 5325, 4171, 7185, 5175, 5655, 1898, 382, 7211, 43, 5965, 6073,
1730, 332, 1577, 3304, 2329, 1699, 6150, 2379, 5113, 333, 3502, 4517, 1480, 1172, 5567, 651,
925, 4573, 599, 1367, 4109, 1863, 6929, 1605, 3866, 2065, 4048, 839, 5764, 2447, 2022, 3345,
1990, 4067, 2036, 2069, 3567, 7371, 2368, 339, 6947, 2159, 654, 7327, 2768, 6676, 987, 2214,
var omegasInvBitrevMontgomery = [kyberN / 2]uint16{
990, 254, 862, 5047, 6586, 5538, 4400, 7103, 2025, 6804, 3858, 1595, 2299, 4345, 1319, 7197,
7678, 5213, 1906, 3639, 1749, 2497, 2547, 6100, 343, 538, 7390, 6396, 7418, 1267, 671, 4098,
5724, 491, 4146, 412, 4143, 5625, 2397, 5596, 6122, 2750, 2196, 1541, 2539, 2079, 2459, 274,
7524, 6539, 5015, 6097, 7040, 5220, 2716, 1752, 28, 2552, 133, 4441, 6719, 2298, 6952, 7075,
4672, 5559, 6830, 1442, 2979, 485, 4549, 4224, 6065, 1944, 5, 1553, 5046, 3436, 4766, 959,
3291, 3684, 6031, 2137, 1597, 2908, 1825, 6132, 98, 1251, 4306, 4022, 4314, 362, 1289, 5560,
3830, 6724, 6671, 1215, 2281, 4899, 5074, 5988, 5041, 1883, 2822, 7024, 2920, 594, 6189, 6662,
3247, 771, 5822, 1742, 4206, 3686, 776, 5987, 8, 4021, 38, 5658, 3017, 6143, 889, 4216,
var psisInvMontgomery = [kyberN]uint16{
1024, 4972, 5779, 6907, 4943, 4168, 315, 5580, 90, 497, 1123, 142, 4710, 5527, 2443, 4871,
698, 2489, 2394, 4003, 684, 2241, 2390, 7224, 5072, 2064, 4741, 1687, 6841, 482, 7441, 1235,
2126, 4742, 2802, 5744, 6287, 4933, 699, 3604, 1297, 2127, 5857, 1705, 3868, 3779, 4397, 2177,
159, 622, 2240, 1275, 640, 6948, 4572, 5277, 209, 2605, 1157, 7328, 5817, 3191, 1662, 2009,
4864, 574, 2487, 164, 6197, 4436, 7257, 3462, 4268, 4281, 3414, 4515, 3170, 1290, 2003, 5855,
7156, 6062, 7531, 1732, 3249, 4884, 7512, 3590, 1049, 2123, 1397, 6093, 3691, 6130, 6541, 3946,
6258, 3322, 1788, 4241, 4900, 2309, 1400, 1757, 400, 502, 6698, 2338, 3011, 668, 7444, 4580,
6516, 6795, 2959, 4136, 3040, 2279, 6355, 3943, 2913, 6613, 7416, 4084, 6508, 5556, 4054, 3782,
61, 6567, 2212, 779, 632, 5709, 5667, 4923, 4911, 6893, 4695, 4164, 3536, 2287, 7594, 2848,
3267, 1911, 3128, 546, 1991, 156, 4958, 5531, 6903, 483, 875, 138, 250, 2234, 2266, 7222,
2842, 4258, 812, 6703, 232, 5207, 6650, 2585, 1900, 6225, 4932, 7265, 4701, 3173, 4635, 6393,
227, 7313, 4454, 4284, 6759, 1224, 5223, 1447, 395, 2608, 4502, 4037, 189, 3348, 54, 6443,
2210, 6230, 2826, 1780, 3002, 5995, 1955, 6102, 6045, 3938, 5019, 4417, 1434, 1262, 1507, 5847,
5917, 7157, 7177, 6434, 7537, 741, 4348, 1309, 145, 374, 2236, 4496, 5028, 6771, 6923, 7421,
1978, 1023, 3857, 6876, 1102, 7451, 4704, 6518, 1344, 765, 384, 5705, 1207, 1630, 4734, 1563,
6839, 5933, 1954, 4987, 7142, 5814, 7527, 4953, 7637, 4707, 2182, 5734, 2818, 541, 4097, 5641,
// reduce.go - Montgomery, Barret, and Full reduction.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to the software, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package kyber
const (
qinv = 7679 // -inverse_mod(q,2^18)
rlog = 18
// Montgomery reduction; given a 32-bit integer a, computes 16-bit integer
// congruent to a * R^-1 mod q, where R=2^18 (see value of rlog).
func montgomeryReduce(a uint32) uint16 {
u := a * qinv
u &= (1 << rlog) - 1
u *= kyberQ
a += u
return uint16(a >> rlog)
// Barrett reduction; given a 16-bit integer a, computes 16-bit integer
// congruent to a mod q in {0,...,11768}.
func barrettReduce(a uint16) uint16 {
u := uint32(a >> 13) // ((uint32_t) a * sinv) >> 16
u *= kyberQ
a -= uint16(u)
return a
// Full reduction; given a 16-bit integer a, computes unsigned integer a mod q.
func freeze(x uint16) uint16 {
r := barrettReduce(x)
m := r - kyberQ
c := int16(m)
c >>= 15
r = m ^ ((r ^ m) & uint16(c))
return r
// MersenneTwister
// From https://gist.github.com/cuixin/1b8b6bd7bfbde8fe76e8
package MersenneTwister
import (
_ "crypto/sha512"
const N = 312
const M = 156
const MATRIX_A = 0xB5026F5AA96619E9
const UPPER_MASK = 0xFFFFFFFF80000000
type MT19937_64 struct {
array [N]uint64 //state vector
index uint64 // array index
func New() *MT19937_64 {
return &MT19937_64{
index: N + 1,
func (m *MT19937_64) _initstate() {
// Recommendations abound that mtwist should throw away 1st 10000 or so
// of initial state
for i := 0; i < 10000; i++ {
_ = m.Int63()
func (m *MT19937_64) Seed(seed int64) {
m.array[0] = uint64(seed)
for m.index = 1; m.index < N; m.index++ {
m.array[m.index] = (6364136223846793005*(m.array[m.index-1]^(m.array[m.index-1]>>62)) + m.index)
//fmt.Printf("final array(s):%v\n", m.array)
func _bytesToUint64(b []byte) (r uint64) {
r = uint64(b[0])<<56 +
uint64(b[1])<<48 +
uint64(b[2])<<40 +
uint64(b[3])<<32 +
uint64(b[4])<<24 +
uint64(b[5])<<16 +
uint64(b[6])<<8 +
func (m *MT19937_64) SeedFullState(s []byte) {
//fmt.Printf("s:%v\n", s)
if len(s) < N*8 {
// Expand s if shorter than mtwist array state
ha := crypto.SHA512
h := ha.New()
shortfallChunks := ((N * 8) - len(s)) / h.Size()
//shortfallRem := ((N * 8) - len(s)) % h.Size()
//fmt.Printf("chunks, rem:%d,%d\n", shortfallChunks, shortfallRem)
idx := 0
for idx < shortfallChunks {
_, _ = h.Write(s)
s = h.Sum(s)
idx += 1
_, _ = h.Write(s)
s = h.Sum(s)
//fmt.Printf("exp s:%v\n", s)
for idx := 0; idx < N; {
m.array[idx] = _bytesToUint64(s[idx*8 : (idx*8)+8])
idx += 1
//fmt.Printf("final array(xs):%v\n", m.array)
m.index = 0
func (m *MT19937_64) Int63() uint64 {
var i int
var x uint64
mag01 := []uint64{0, MATRIX_A}
if m.index >= N {
if m.index == N+1 {
for i = 0; i < N-M; i++ {
x = (m.array[i] & UPPER_MASK) | (m.array[i+1] & LOWER_MASK)
m.array[i] = m.array[i+(M)] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
for ; i < N-1; i++ {
x = (m.array[i] & UPPER_MASK) | (m.array[i+1] & LOWER_MASK)
m.array[i] = m.array[i+(M-N)] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
x = (m.array[N-1] & UPPER_MASK) | (m.array[0] & LOWER_MASK)
m.array[N-1] = m.array[M-1] ^ (x >> 1) ^ mag01[int(x&uint64(1))]
m.index = 0
x = m.array[m.index]
x ^= (x >> 29) & 0x5555555555555555
x ^= (x << 17) & 0x71D67FFFEDA60000
x ^= (x << 37) & 0xFFF7EEE000000000
x ^= (x >> 43)
return x
func (m *MT19937_64) IntN(value uint64) uint64 {
return m.Int63() % value
func (m *MT19937_64) Read(p []byte) (n int, err error) {
for idx := 0; idx < len(p); idx++ {
p[idx] = byte( (m.Int63()>>47) % 256)
return n, nil
Creative Commons Legal Code
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
// batcher.go - Constant time polynomial sampler.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
func batcher84(x []uint16) {
// In theory this should probably be inlined.
compareAndSwap := func(x []uint16, i, j int) {
const _5q = 5 * paramQ
var c int32
var t uint16
c = _5q - 1 - int32(x[16*i])
c >>= 31
t = x[16*i] ^ x[16*j]
t &= uint16(c)
x[16*i] ^= t
x[16*j] ^= t
compareAndSwap(x, 0, 1)
compareAndSwap(x, 2, 3)
compareAndSwap(x, 0, 2)
compareAndSwap(x, 1, 3)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 4, 5)
compareAndSwap(x, 6, 7)
compareAndSwap(x, 4, 6)
compareAndSwap(x, 5, 7)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 0, 4)
compareAndSwap(x, 2, 6)
compareAndSwap(x, 2, 4)
compareAndSwap(x, 1, 5)
compareAndSwap(x, 3, 7)
compareAndSwap(x, 3, 5)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 3, 4)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 8, 9)
compareAndSwap(x, 10, 11)
compareAndSwap(x, 8, 10)
compareAndSwap(x, 9, 11)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 12, 13)
compareAndSwap(x, 14, 15)
compareAndSwap(x, 12, 14)
compareAndSwap(x, 13, 15)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 8, 12)
compareAndSwap(x, 10, 14)
compareAndSwap(x, 10, 12)
compareAndSwap(x, 9, 13)
compareAndSwap(x, 11, 15)
compareAndSwap(x, 11, 13)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 11, 12)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 0, 8)
compareAndSwap(x, 4, 12)
compareAndSwap(x, 4, 8)
compareAndSwap(x, 2, 10)
compareAndSwap(x, 6, 14)
compareAndSwap(x, 6, 10)
compareAndSwap(x, 2, 4)
compareAndSwap(x, 6, 8)
compareAndSwap(x, 10, 12)
compareAndSwap(x, 1, 9)
compareAndSwap(x, 5, 13)
compareAndSwap(x, 5, 9)
compareAndSwap(x, 3, 11)
compareAndSwap(x, 7, 15)
compareAndSwap(x, 7, 11)
compareAndSwap(x, 3, 5)
compareAndSwap(x, 7, 9)
compareAndSwap(x, 11, 13)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 3, 4)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 7, 8)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 11, 12)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 16, 17)
compareAndSwap(x, 18, 19)
compareAndSwap(x, 16, 18)
compareAndSwap(x, 17, 19)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 20, 21)
compareAndSwap(x, 22, 23)
compareAndSwap(x, 20, 22)
compareAndSwap(x, 21, 23)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 16, 20)
compareAndSwap(x, 18, 22)
compareAndSwap(x, 18, 20)
compareAndSwap(x, 17, 21)
compareAndSwap(x, 19, 23)
compareAndSwap(x, 19, 21)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 19, 20)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 24, 25)
compareAndSwap(x, 26, 27)
compareAndSwap(x, 24, 26)
compareAndSwap(x, 25, 27)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 28, 29)
compareAndSwap(x, 30, 31)
compareAndSwap(x, 28, 30)
compareAndSwap(x, 29, 31)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 24, 28)
compareAndSwap(x, 26, 30)
compareAndSwap(x, 26, 28)
compareAndSwap(x, 25, 29)
compareAndSwap(x, 27, 31)
compareAndSwap(x, 27, 29)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 27, 28)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 16, 24)
compareAndSwap(x, 20, 28)
compareAndSwap(x, 20, 24)
compareAndSwap(x, 18, 26)
compareAndSwap(x, 22, 30)
compareAndSwap(x, 22, 26)
compareAndSwap(x, 18, 20)
compareAndSwap(x, 22, 24)
compareAndSwap(x, 26, 28)
compareAndSwap(x, 17, 25)
compareAndSwap(x, 21, 29)
compareAndSwap(x, 21, 25)
compareAndSwap(x, 19, 27)
compareAndSwap(x, 23, 31)
compareAndSwap(x, 23, 27)
compareAndSwap(x, 19, 21)
compareAndSwap(x, 23, 25)
compareAndSwap(x, 27, 29)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 19, 20)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 23, 24)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 27, 28)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 0, 16)
compareAndSwap(x, 8, 24)
compareAndSwap(x, 8, 16)
compareAndSwap(x, 4, 20)
compareAndSwap(x, 12, 28)
compareAndSwap(x, 12, 20)
compareAndSwap(x, 4, 8)
compareAndSwap(x, 12, 16)
compareAndSwap(x, 20, 24)
compareAndSwap(x, 2, 18)
compareAndSwap(x, 10, 26)
compareAndSwap(x, 10, 18)
compareAndSwap(x, 6, 22)
compareAndSwap(x, 14, 30)
compareAndSwap(x, 14, 22)
compareAndSwap(x, 6, 10)
compareAndSwap(x, 14, 18)
compareAndSwap(x, 22, 26)
compareAndSwap(x, 2, 4)
compareAndSwap(x, 6, 8)
compareAndSwap(x, 10, 12)
compareAndSwap(x, 14, 16)
compareAndSwap(x, 18, 20)
compareAndSwap(x, 22, 24)
compareAndSwap(x, 26, 28)
compareAndSwap(x, 1, 17)
compareAndSwap(x, 9, 25)
compareAndSwap(x, 9, 17)
compareAndSwap(x, 5, 21)
compareAndSwap(x, 13, 29)
compareAndSwap(x, 13, 21)
compareAndSwap(x, 5, 9)
compareAndSwap(x, 13, 17)
compareAndSwap(x, 21, 25)
compareAndSwap(x, 3, 19)
compareAndSwap(x, 11, 27)
compareAndSwap(x, 11, 19)
compareAndSwap(x, 7, 23)
compareAndSwap(x, 15, 31)
compareAndSwap(x, 15, 23)
compareAndSwap(x, 7, 11)
compareAndSwap(x, 15, 19)
compareAndSwap(x, 23, 27)
compareAndSwap(x, 3, 5)
compareAndSwap(x, 7, 9)
compareAndSwap(x, 11, 13)
compareAndSwap(x, 15, 17)
compareAndSwap(x, 19, 21)
compareAndSwap(x, 23, 25)
compareAndSwap(x, 27, 29)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 3, 4)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 7, 8)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 11, 12)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 15, 16)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 19, 20)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 23, 24)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 27, 28)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 32, 33)
compareAndSwap(x, 34, 35)
compareAndSwap(x, 32, 34)
compareAndSwap(x, 33, 35)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 36, 37)
compareAndSwap(x, 38, 39)
compareAndSwap(x, 36, 38)
compareAndSwap(x, 37, 39)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 32, 36)
compareAndSwap(x, 34, 38)
compareAndSwap(x, 34, 36)
compareAndSwap(x, 33, 37)
compareAndSwap(x, 35, 39)
compareAndSwap(x, 35, 37)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 35, 36)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 40, 41)
compareAndSwap(x, 42, 43)
compareAndSwap(x, 40, 42)
compareAndSwap(x, 41, 43)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 44, 45)
compareAndSwap(x, 46, 47)
compareAndSwap(x, 44, 46)
compareAndSwap(x, 45, 47)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 40, 44)
compareAndSwap(x, 42, 46)
compareAndSwap(x, 42, 44)
compareAndSwap(x, 41, 45)
compareAndSwap(x, 43, 47)
compareAndSwap(x, 43, 45)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 43, 44)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 32, 40)
compareAndSwap(x, 36, 44)
compareAndSwap(x, 36, 40)
compareAndSwap(x, 34, 42)
compareAndSwap(x, 38, 46)
compareAndSwap(x, 38, 42)
compareAndSwap(x, 34, 36)
compareAndSwap(x, 38, 40)
compareAndSwap(x, 42, 44)
compareAndSwap(x, 33, 41)
compareAndSwap(x, 37, 45)
compareAndSwap(x, 37, 41)
compareAndSwap(x, 35, 43)
compareAndSwap(x, 39, 47)
compareAndSwap(x, 39, 43)
compareAndSwap(x, 35, 37)
compareAndSwap(x, 39, 41)
compareAndSwap(x, 43, 45)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 35, 36)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 39, 40)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 43, 44)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 48, 49)
compareAndSwap(x, 50, 51)
compareAndSwap(x, 48, 50)
compareAndSwap(x, 49, 51)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 52, 53)
compareAndSwap(x, 54, 55)
compareAndSwap(x, 52, 54)
compareAndSwap(x, 53, 55)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 48, 52)
compareAndSwap(x, 50, 54)
compareAndSwap(x, 50, 52)
compareAndSwap(x, 49, 53)
compareAndSwap(x, 51, 55)
compareAndSwap(x, 51, 53)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 51, 52)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 56, 57)
compareAndSwap(x, 58, 59)
compareAndSwap(x, 56, 58)
compareAndSwap(x, 57, 59)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 60, 61)
compareAndSwap(x, 62, 63)
compareAndSwap(x, 60, 62)
compareAndSwap(x, 61, 63)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 56, 60)
compareAndSwap(x, 58, 62)
compareAndSwap(x, 58, 60)
compareAndSwap(x, 57, 61)
compareAndSwap(x, 59, 63)
compareAndSwap(x, 59, 61)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 59, 60)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 48, 56)
compareAndSwap(x, 52, 60)
compareAndSwap(x, 52, 56)
compareAndSwap(x, 50, 58)
compareAndSwap(x, 54, 62)
compareAndSwap(x, 54, 58)
compareAndSwap(x, 50, 52)
compareAndSwap(x, 54, 56)
compareAndSwap(x, 58, 60)
compareAndSwap(x, 49, 57)
compareAndSwap(x, 53, 61)
compareAndSwap(x, 53, 57)
compareAndSwap(x, 51, 59)
compareAndSwap(x, 55, 63)
compareAndSwap(x, 55, 59)
compareAndSwap(x, 51, 53)
compareAndSwap(x, 55, 57)
compareAndSwap(x, 59, 61)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 51, 52)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 55, 56)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 59, 60)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 32, 48)
compareAndSwap(x, 40, 56)
compareAndSwap(x, 40, 48)
compareAndSwap(x, 36, 52)
compareAndSwap(x, 44, 60)
compareAndSwap(x, 44, 52)
compareAndSwap(x, 36, 40)
compareAndSwap(x, 44, 48)
compareAndSwap(x, 52, 56)
compareAndSwap(x, 34, 50)
compareAndSwap(x, 42, 58)
compareAndSwap(x, 42, 50)
compareAndSwap(x, 38, 54)
compareAndSwap(x, 46, 62)
compareAndSwap(x, 46, 54)
compareAndSwap(x, 38, 42)
compareAndSwap(x, 46, 50)
compareAndSwap(x, 54, 58)
compareAndSwap(x, 34, 36)
compareAndSwap(x, 38, 40)
compareAndSwap(x, 42, 44)
compareAndSwap(x, 46, 48)
compareAndSwap(x, 50, 52)
compareAndSwap(x, 54, 56)
compareAndSwap(x, 58, 60)
compareAndSwap(x, 33, 49)
compareAndSwap(x, 41, 57)
compareAndSwap(x, 41, 49)
compareAndSwap(x, 37, 53)
compareAndSwap(x, 45, 61)
compareAndSwap(x, 45, 53)
compareAndSwap(x, 37, 41)
compareAndSwap(x, 45, 49)
compareAndSwap(x, 53, 57)
compareAndSwap(x, 35, 51)
compareAndSwap(x, 43, 59)
compareAndSwap(x, 43, 51)
compareAndSwap(x, 39, 55)
compareAndSwap(x, 47, 63)
compareAndSwap(x, 47, 55)
compareAndSwap(x, 39, 43)
compareAndSwap(x, 47, 51)
compareAndSwap(x, 55, 59)
compareAndSwap(x, 35, 37)
compareAndSwap(x, 39, 41)
compareAndSwap(x, 43, 45)
compareAndSwap(x, 47, 49)
compareAndSwap(x, 51, 53)
compareAndSwap(x, 55, 57)
compareAndSwap(x, 59, 61)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 35, 36)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 39, 40)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 43, 44)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 47, 48)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 51, 52)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 55, 56)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 59, 60)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 0, 32)
compareAndSwap(x, 16, 48)
compareAndSwap(x, 16, 32)
compareAndSwap(x, 8, 40)
compareAndSwap(x, 24, 56)
compareAndSwap(x, 24, 40)
compareAndSwap(x, 8, 16)
compareAndSwap(x, 24, 32)
compareAndSwap(x, 40, 48)
compareAndSwap(x, 4, 36)
compareAndSwap(x, 20, 52)
compareAndSwap(x, 20, 36)
compareAndSwap(x, 12, 44)
compareAndSwap(x, 28, 60)
compareAndSwap(x, 28, 44)
compareAndSwap(x, 12, 20)
compareAndSwap(x, 28, 36)
compareAndSwap(x, 44, 52)
compareAndSwap(x, 4, 8)
compareAndSwap(x, 12, 16)
compareAndSwap(x, 20, 24)
compareAndSwap(x, 28, 32)
compareAndSwap(x, 36, 40)
compareAndSwap(x, 44, 48)
compareAndSwap(x, 52, 56)
compareAndSwap(x, 2, 34)
compareAndSwap(x, 18, 50)
compareAndSwap(x, 18, 34)
compareAndSwap(x, 10, 42)
compareAndSwap(x, 26, 58)
compareAndSwap(x, 26, 42)
compareAndSwap(x, 10, 18)
compareAndSwap(x, 26, 34)
compareAndSwap(x, 42, 50)
compareAndSwap(x, 6, 38)
compareAndSwap(x, 22, 54)
compareAndSwap(x, 22, 38)
compareAndSwap(x, 14, 46)
compareAndSwap(x, 30, 62)
compareAndSwap(x, 30, 46)
compareAndSwap(x, 14, 22)
compareAndSwap(x, 30, 38)
compareAndSwap(x, 46, 54)
compareAndSwap(x, 6, 10)
compareAndSwap(x, 14, 18)
compareAndSwap(x, 22, 26)
compareAndSwap(x, 30, 34)
compareAndSwap(x, 38, 42)
compareAndSwap(x, 46, 50)
compareAndSwap(x, 54, 58)
compareAndSwap(x, 2, 4)
compareAndSwap(x, 6, 8)
compareAndSwap(x, 10, 12)
compareAndSwap(x, 14, 16)
compareAndSwap(x, 18, 20)
compareAndSwap(x, 22, 24)
compareAndSwap(x, 26, 28)
compareAndSwap(x, 30, 32)
compareAndSwap(x, 34, 36)
compareAndSwap(x, 38, 40)
compareAndSwap(x, 42, 44)
compareAndSwap(x, 46, 48)
compareAndSwap(x, 50, 52)
compareAndSwap(x, 54, 56)
compareAndSwap(x, 58, 60)
compareAndSwap(x, 1, 33)
compareAndSwap(x, 17, 49)
compareAndSwap(x, 17, 33)
compareAndSwap(x, 9, 41)
compareAndSwap(x, 25, 57)
compareAndSwap(x, 25, 41)
compareAndSwap(x, 9, 17)
compareAndSwap(x, 25, 33)
compareAndSwap(x, 41, 49)
compareAndSwap(x, 5, 37)
compareAndSwap(x, 21, 53)
compareAndSwap(x, 21, 37)
compareAndSwap(x, 13, 45)
compareAndSwap(x, 29, 61)
compareAndSwap(x, 29, 45)
compareAndSwap(x, 13, 21)
compareAndSwap(x, 29, 37)
compareAndSwap(x, 45, 53)
compareAndSwap(x, 5, 9)
compareAndSwap(x, 13, 17)
compareAndSwap(x, 21, 25)
compareAndSwap(x, 29, 33)
compareAndSwap(x, 37, 41)
compareAndSwap(x, 45, 49)
compareAndSwap(x, 53, 57)
compareAndSwap(x, 3, 35)
compareAndSwap(x, 19, 51)
compareAndSwap(x, 19, 35)
compareAndSwap(x, 11, 43)
compareAndSwap(x, 27, 59)
compareAndSwap(x, 27, 43)
compareAndSwap(x, 11, 19)
compareAndSwap(x, 27, 35)
compareAndSwap(x, 43, 51)
compareAndSwap(x, 7, 39)
compareAndSwap(x, 23, 55)
compareAndSwap(x, 23, 39)
compareAndSwap(x, 15, 47)
compareAndSwap(x, 31, 63)
compareAndSwap(x, 31, 47)
compareAndSwap(x, 15, 23)
compareAndSwap(x, 31, 39)
compareAndSwap(x, 47, 55)
compareAndSwap(x, 7, 11)
compareAndSwap(x, 15, 19)
compareAndSwap(x, 23, 27)
compareAndSwap(x, 31, 35)
compareAndSwap(x, 39, 43)
compareAndSwap(x, 47, 51)
compareAndSwap(x, 55, 59)
compareAndSwap(x, 3, 5)
compareAndSwap(x, 7, 9)
compareAndSwap(x, 11, 13)
compareAndSwap(x, 15, 17)
compareAndSwap(x, 19, 21)
compareAndSwap(x, 23, 25)
compareAndSwap(x, 27, 29)
compareAndSwap(x, 31, 33)
compareAndSwap(x, 35, 37)
compareAndSwap(x, 39, 41)
compareAndSwap(x, 43, 45)
compareAndSwap(x, 47, 49)
compareAndSwap(x, 51, 53)
compareAndSwap(x, 55, 57)
compareAndSwap(x, 59, 61)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 3, 4)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 7, 8)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 11, 12)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 15, 16)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 19, 20)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 23, 24)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 27, 28)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 31, 32)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 35, 36)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 39, 40)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 43, 44)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 47, 48)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 51, 52)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 55, 56)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 59, 60)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 64, 65)
compareAndSwap(x, 66, 67)
compareAndSwap(x, 64, 66)
compareAndSwap(x, 65, 67)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 68, 69)
compareAndSwap(x, 70, 71)
compareAndSwap(x, 68, 70)
compareAndSwap(x, 69, 71)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 64, 68)
compareAndSwap(x, 66, 70)
compareAndSwap(x, 66, 68)
compareAndSwap(x, 65, 69)
compareAndSwap(x, 67, 71)
compareAndSwap(x, 67, 69)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 67, 68)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 72, 73)
compareAndSwap(x, 74, 75)
compareAndSwap(x, 72, 74)
compareAndSwap(x, 73, 75)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 76, 77)
compareAndSwap(x, 78, 79)
compareAndSwap(x, 76, 78)
compareAndSwap(x, 77, 79)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 72, 76)
compareAndSwap(x, 74, 78)
compareAndSwap(x, 74, 76)
compareAndSwap(x, 73, 77)
compareAndSwap(x, 75, 79)
compareAndSwap(x, 75, 77)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 75, 76)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 64, 72)
compareAndSwap(x, 68, 76)
compareAndSwap(x, 68, 72)
compareAndSwap(x, 66, 74)
compareAndSwap(x, 70, 78)
compareAndSwap(x, 70, 74)
compareAndSwap(x, 66, 68)
compareAndSwap(x, 70, 72)
compareAndSwap(x, 74, 76)
compareAndSwap(x, 65, 73)
compareAndSwap(x, 69, 77)
compareAndSwap(x, 69, 73)
compareAndSwap(x, 67, 75)
compareAndSwap(x, 71, 79)
compareAndSwap(x, 71, 75)
compareAndSwap(x, 67, 69)
compareAndSwap(x, 71, 73)
compareAndSwap(x, 75, 77)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 67, 68)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 71, 72)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 75, 76)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 80, 81)
compareAndSwap(x, 82, 83)
compareAndSwap(x, 80, 82)
compareAndSwap(x, 81, 83)
compareAndSwap(x, 81, 82)
compareAndSwap(x, 81, 82)
compareAndSwap(x, 81, 82)
compareAndSwap(x, 64, 80)
compareAndSwap(x, 72, 80)
compareAndSwap(x, 68, 72)
compareAndSwap(x, 76, 80)
compareAndSwap(x, 66, 82)
compareAndSwap(x, 74, 82)
compareAndSwap(x, 70, 74)
compareAndSwap(x, 78, 82)
compareAndSwap(x, 66, 68)
compareAndSwap(x, 70, 72)
compareAndSwap(x, 74, 76)
compareAndSwap(x, 78, 80)
compareAndSwap(x, 65, 81)
compareAndSwap(x, 73, 81)
compareAndSwap(x, 69, 73)
compareAndSwap(x, 77, 81)
compareAndSwap(x, 67, 83)
compareAndSwap(x, 75, 83)
compareAndSwap(x, 71, 75)
compareAndSwap(x, 79, 83)
compareAndSwap(x, 67, 69)
compareAndSwap(x, 71, 73)
compareAndSwap(x, 75, 77)
compareAndSwap(x, 79, 81)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 67, 68)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 71, 72)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 75, 76)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 79, 80)
compareAndSwap(x, 81, 82)
compareAndSwap(x, 72, 80)
compareAndSwap(x, 68, 72)
compareAndSwap(x, 76, 80)
compareAndSwap(x, 74, 82)
compareAndSwap(x, 70, 74)
compareAndSwap(x, 78, 82)
compareAndSwap(x, 66, 68)
compareAndSwap(x, 70, 72)
compareAndSwap(x, 74, 76)
compareAndSwap(x, 78, 80)
compareAndSwap(x, 73, 81)
compareAndSwap(x, 69, 73)
compareAndSwap(x, 77, 81)
compareAndSwap(x, 75, 83)
compareAndSwap(x, 71, 75)
compareAndSwap(x, 79, 83)
compareAndSwap(x, 67, 69)
compareAndSwap(x, 71, 73)
compareAndSwap(x, 75, 77)
compareAndSwap(x, 79, 81)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 67, 68)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 71, 72)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 75, 76)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 79, 80)
compareAndSwap(x, 81, 82)
compareAndSwap(x, 0, 64)
compareAndSwap(x, 32, 64)
compareAndSwap(x, 16, 80)
compareAndSwap(x, 48, 80)
compareAndSwap(x, 16, 32)
compareAndSwap(x, 48, 64)
compareAndSwap(x, 8, 72)
compareAndSwap(x, 40, 72)
compareAndSwap(x, 24, 40)
compareAndSwap(x, 56, 72)
compareAndSwap(x, 8, 16)
compareAndSwap(x, 24, 32)
compareAndSwap(x, 40, 48)
compareAndSwap(x, 56, 64)
compareAndSwap(x, 72, 80)
compareAndSwap(x, 4, 68)
compareAndSwap(x, 36, 68)
compareAndSwap(x, 20, 36)
compareAndSwap(x, 52, 68)
compareAndSwap(x, 12, 76)
compareAndSwap(x, 44, 76)
compareAndSwap(x, 28, 44)
compareAndSwap(x, 60, 76)
compareAndSwap(x, 12, 20)
compareAndSwap(x, 28, 36)
compareAndSwap(x, 44, 52)
compareAndSwap(x, 60, 68)
compareAndSwap(x, 4, 8)
compareAndSwap(x, 12, 16)
compareAndSwap(x, 20, 24)
compareAndSwap(x, 28, 32)
compareAndSwap(x, 36, 40)
compareAndSwap(x, 44, 48)
compareAndSwap(x, 52, 56)
compareAndSwap(x, 60, 64)
compareAndSwap(x, 68, 72)
compareAndSwap(x, 76, 80)
compareAndSwap(x, 2, 66)
compareAndSwap(x, 34, 66)
compareAndSwap(x, 18, 82)
compareAndSwap(x, 50, 82)
compareAndSwap(x, 18, 34)
compareAndSwap(x, 50, 66)
compareAndSwap(x, 10, 74)
compareAndSwap(x, 42, 74)
compareAndSwap(x, 26, 42)
compareAndSwap(x, 58, 74)
compareAndSwap(x, 10, 18)
compareAndSwap(x, 26, 34)
compareAndSwap(x, 42, 50)
compareAndSwap(x, 58, 66)
compareAndSwap(x, 74, 82)
compareAndSwap(x, 6, 70)
compareAndSwap(x, 38, 70)
compareAndSwap(x, 22, 38)
compareAndSwap(x, 54, 70)
compareAndSwap(x, 14, 78)
compareAndSwap(x, 46, 78)
compareAndSwap(x, 30, 46)
compareAndSwap(x, 62, 78)
compareAndSwap(x, 14, 22)
compareAndSwap(x, 30, 38)
compareAndSwap(x, 46, 54)
compareAndSwap(x, 62, 70)
compareAndSwap(x, 6, 10)
compareAndSwap(x, 14, 18)
compareAndSwap(x, 22, 26)
compareAndSwap(x, 30, 34)
compareAndSwap(x, 38, 42)
compareAndSwap(x, 46, 50)
compareAndSwap(x, 54, 58)
compareAndSwap(x, 62, 66)
compareAndSwap(x, 70, 74)
compareAndSwap(x, 78, 82)
compareAndSwap(x, 2, 4)
compareAndSwap(x, 6, 8)
compareAndSwap(x, 10, 12)
compareAndSwap(x, 14, 16)
compareAndSwap(x, 18, 20)
compareAndSwap(x, 22, 24)
compareAndSwap(x, 26, 28)
compareAndSwap(x, 30, 32)
compareAndSwap(x, 34, 36)
compareAndSwap(x, 38, 40)
compareAndSwap(x, 42, 44)
compareAndSwap(x, 46, 48)
compareAndSwap(x, 50, 52)
compareAndSwap(x, 54, 56)
compareAndSwap(x, 58, 60)
compareAndSwap(x, 62, 64)
compareAndSwap(x, 66, 68)
compareAndSwap(x, 70, 72)
compareAndSwap(x, 74, 76)
compareAndSwap(x, 78, 80)
compareAndSwap(x, 1, 65)
compareAndSwap(x, 33, 65)
compareAndSwap(x, 17, 81)
compareAndSwap(x, 49, 81)
compareAndSwap(x, 17, 33)
compareAndSwap(x, 49, 65)
compareAndSwap(x, 9, 73)
compareAndSwap(x, 41, 73)
compareAndSwap(x, 25, 41)
compareAndSwap(x, 57, 73)
compareAndSwap(x, 9, 17)
compareAndSwap(x, 25, 33)
compareAndSwap(x, 41, 49)
compareAndSwap(x, 57, 65)
compareAndSwap(x, 73, 81)
compareAndSwap(x, 5, 69)
compareAndSwap(x, 37, 69)
compareAndSwap(x, 21, 37)
compareAndSwap(x, 53, 69)
compareAndSwap(x, 13, 77)
compareAndSwap(x, 45, 77)
compareAndSwap(x, 29, 45)
compareAndSwap(x, 61, 77)
compareAndSwap(x, 13, 21)
compareAndSwap(x, 29, 37)
compareAndSwap(x, 45, 53)
compareAndSwap(x, 61, 69)
compareAndSwap(x, 5, 9)
compareAndSwap(x, 13, 17)
compareAndSwap(x, 21, 25)
compareAndSwap(x, 29, 33)
compareAndSwap(x, 37, 41)
compareAndSwap(x, 45, 49)
compareAndSwap(x, 53, 57)
compareAndSwap(x, 61, 65)
compareAndSwap(x, 69, 73)
compareAndSwap(x, 77, 81)
compareAndSwap(x, 3, 67)
compareAndSwap(x, 35, 67)
compareAndSwap(x, 19, 83)
compareAndSwap(x, 51, 83)
compareAndSwap(x, 19, 35)
compareAndSwap(x, 51, 67)
compareAndSwap(x, 11, 75)
compareAndSwap(x, 43, 75)
compareAndSwap(x, 27, 43)
compareAndSwap(x, 59, 75)
compareAndSwap(x, 11, 19)
compareAndSwap(x, 27, 35)
compareAndSwap(x, 43, 51)
compareAndSwap(x, 59, 67)
compareAndSwap(x, 75, 83)
compareAndSwap(x, 7, 71)
compareAndSwap(x, 39, 71)
compareAndSwap(x, 23, 39)
compareAndSwap(x, 55, 71)
compareAndSwap(x, 15, 79)
compareAndSwap(x, 47, 79)
compareAndSwap(x, 31, 47)
compareAndSwap(x, 63, 79)
compareAndSwap(x, 15, 23)
compareAndSwap(x, 31, 39)
compareAndSwap(x, 47, 55)
compareAndSwap(x, 63, 71)
compareAndSwap(x, 7, 11)
compareAndSwap(x, 15, 19)
compareAndSwap(x, 23, 27)
compareAndSwap(x, 31, 35)
compareAndSwap(x, 39, 43)
compareAndSwap(x, 47, 51)
compareAndSwap(x, 55, 59)
compareAndSwap(x, 63, 67)
compareAndSwap(x, 71, 75)
compareAndSwap(x, 79, 83)
compareAndSwap(x, 3, 5)
compareAndSwap(x, 7, 9)
compareAndSwap(x, 11, 13)
compareAndSwap(x, 15, 17)
compareAndSwap(x, 19, 21)
compareAndSwap(x, 23, 25)
compareAndSwap(x, 27, 29)
compareAndSwap(x, 31, 33)
compareAndSwap(x, 35, 37)
compareAndSwap(x, 39, 41)
compareAndSwap(x, 43, 45)
compareAndSwap(x, 47, 49)
compareAndSwap(x, 51, 53)
compareAndSwap(x, 55, 57)
compareAndSwap(x, 59, 61)
compareAndSwap(x, 63, 65)
compareAndSwap(x, 67, 69)
compareAndSwap(x, 71, 73)
compareAndSwap(x, 75, 77)
compareAndSwap(x, 79, 81)
compareAndSwap(x, 1, 2)
compareAndSwap(x, 3, 4)
compareAndSwap(x, 5, 6)
compareAndSwap(x, 7, 8)
compareAndSwap(x, 9, 10)
compareAndSwap(x, 11, 12)
compareAndSwap(x, 13, 14)
compareAndSwap(x, 15, 16)
compareAndSwap(x, 17, 18)
compareAndSwap(x, 19, 20)
compareAndSwap(x, 21, 22)
compareAndSwap(x, 23, 24)
compareAndSwap(x, 25, 26)
compareAndSwap(x, 27, 28)
compareAndSwap(x, 29, 30)
compareAndSwap(x, 31, 32)
compareAndSwap(x, 33, 34)
compareAndSwap(x, 35, 36)
compareAndSwap(x, 37, 38)
compareAndSwap(x, 39, 40)
compareAndSwap(x, 41, 42)
compareAndSwap(x, 43, 44)
compareAndSwap(x, 45, 46)
compareAndSwap(x, 47, 48)
compareAndSwap(x, 49, 50)
compareAndSwap(x, 51, 52)
compareAndSwap(x, 53, 54)
compareAndSwap(x, 55, 56)
compareAndSwap(x, 57, 58)
compareAndSwap(x, 59, 60)
compareAndSwap(x, 61, 62)
compareAndSwap(x, 63, 64)
compareAndSwap(x, 65, 66)
compareAndSwap(x, 67, 68)
compareAndSwap(x, 69, 70)
compareAndSwap(x, 71, 72)
compareAndSwap(x, 73, 74)
compareAndSwap(x, 75, 76)
compareAndSwap(x, 77, 78)
compareAndSwap(x, 79, 80)
compareAndSwap(x, 81, 82)
// error_correction.go - NewHope key exchange error correction.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
import "blitter.com/go/chacha20"
func abs(v int32) int32 {
mask := v >> 31
return (v ^ mask) - mask
func f(v0, v1 *int32, x int32) int32 {
// The`ref` code uses uint32 for x, but none of the values ever get large
// enough for that, and that would be cast-tastic due to Go being Go.
// Next 6 lines compute t = x/PARAM_Q
b := x * 2730
t := b >> 25
b = x - t*paramQ
b = (paramQ - 1) - b
b >>= 31
t -= b
r := t & 1
xit := t >> 1
*v0 = xit + r // v0 = round(x/(2*PARAM_Q))
r = t & 1
*v1 = (t >> 1) + r
return abs(x - ((*v0) * 2 * paramQ))
func g(x int32) int32 {
// Next 6 lines compute t = x/(4 *PARAMQ)
b := x * 2730
t := b >> 27
b = x - t*(paramQ*4)
b = (paramQ * 4) - b
b >>= 31
t -= b
c := t & 1
t = (t >> 1) + c // t = round(x/(8*PARAM_Q))
t *= 8 * paramQ
return abs(t - x)
func llDecode(xi0, xi1, xi2, xi3 int32) int16 {
t := g(xi0)
t += g(xi1)
t += g(xi2)
t += g(xi3)
t -= 8 * paramQ
t >>= 31
return int16(t & 1)
func (c *poly) helpRec(v *poly, seed *[SeedBytes]byte, nonce byte) {
var v0, v1, vTmp [4]int32
var k int32
var rand [32]byte
var n [8]byte
n[7] = nonce
stream, err := chacha20.New(seed[:], n[:])
if err != nil {
defer memwipe(rand[:])
for i := uint(0); i < 256; i++ {
rBit := int32((rand[i>>3] >> (i & 7)) & 1)
vTmp[0], vTmp[1], vTmp[2], vTmp[3] = int32(v.coeffs[i]), int32(v.coeffs[256+i]), int32(v.coeffs[512+i]), int32(v.coeffs[768+i])
// newhope-20151209 - New version of the reconciliation.
k = f(&v0[0], &v1[0], 8*vTmp[0]+4*rBit)
k += f(&v0[1], &v1[1], 8*vTmp[1]+4*rBit)
k += f(&v0[2], &v1[2], 8*vTmp[2]+4*rBit)
k += f(&v0[3], &v1[3], 8*vTmp[3]+4*rBit)
k = (2*paramQ - 1 - k) >> 31
vTmp[0] = ((^k) & v0[0]) ^ (k & v1[0])
vTmp[1] = ((^k) & v0[1]) ^ (k & v1[1])
vTmp[2] = ((^k) & v0[2]) ^ (k & v1[2])
vTmp[3] = ((^k) & v0[3]) ^ (k & v1[3])
c.coeffs[0+i] = uint16((vTmp[0] - vTmp[3]) & 3)
c.coeffs[256+i] = uint16((vTmp[1] - vTmp[3]) & 3)
c.coeffs[512+i] = uint16((vTmp[2] - vTmp[3]) & 3)
c.coeffs[768+i] = uint16((-k + 2*vTmp[3]) & 3)
for i := range vTmp {
vTmp[i] = 0
func rec(key *[32]byte, v, c *poly) {
var tmp, vTmp, cTmp [4]int32
for i := range key {
key[i] = 0
for i := uint(0); i < 256; i++ {
vTmp[0], vTmp[1], vTmp[2], vTmp[3] = int32(v.coeffs[i]), int32(v.coeffs[256+i]), int32(v.coeffs[512+i]), int32(v.coeffs[768+i])
cTmp[0], cTmp[1], cTmp[2], cTmp[3] = int32(c.coeffs[i]), int32(c.coeffs[256+i]), int32(c.coeffs[512+i]), int32(c.coeffs[768+i])
tmp[0] = 16*paramQ + 8*vTmp[0] - paramQ*(2*cTmp[0]+cTmp[3])
tmp[1] = 16*paramQ + 8*vTmp[1] - paramQ*(2*cTmp[1]+cTmp[3])
tmp[2] = 16*paramQ + 8*vTmp[2] - paramQ*(2*cTmp[2]+cTmp[3])
tmp[3] = 16*paramQ + 8*vTmp[3] - paramQ*(cTmp[3])
key[i>>3] |= byte(llDecode(tmp[0], tmp[1], tmp[2], tmp[3]) << (i & 7))
for i := 0; i < 4; i++ {
tmp[i] = 0
vTmp[i] = 0
cTmp[i] = 0
// newhope.go - NewHope interface.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
// Package newhope implements a key exchange based on the Ring Learning with
// Errors Problem. It is a mechanical port of the Public Domain implementation
// by Erdem Alkim, Léo Ducas, Thomas Pöppelmann, and Peter Schwabe.
// For more information see:
// https://cryptojedi.org/papers/newhope-20161119.pdf
// https://cryptojedi.org/papers/newhopesimple-20161217.pdf
package newhope
import (
const (
// SharedSecretSize is the length of a Shared Secret in bytes.
SharedSecretSize = 32
// UpstreamVersion is the version of the upstream package this
// implementation is compatible with.
UpstreamVersion = "20160815"
// RecBytes is the length of the reconciliation data in bytes.
RecBytes = 256
// SendASize is the length of Alice's public key in bytes.
SendASize = PolyBytes + SeedBytes
// SendBSize is the length of Bob's public key in bytes.
SendBSize = PolyBytes + RecBytes
// TorSampling enables the constant time generation of the `a` parameter,
// where every successful `a` generation will take the same amount of time.
// Most users will probably not want to enable this as it does come with a
// performance penalty. Alice and Bob *MUST* agree on the sampling method,
// or the key exchange will fail.
var TorSampling = false
func encodeA(r []byte, pk *poly, seed *[SeedBytes]byte) {
for i := 0; i < SeedBytes; i++ {
r[PolyBytes+i] = seed[i]
func decodeA(pk *poly, seed *[SeedBytes]byte, r []byte) {
for i := range seed {
seed[i] = r[PolyBytes+i]
func encodeB(r []byte, b *poly, c *poly) {
for i := 0; i < paramN/4; i++ {
r[PolyBytes+i] = byte(c.coeffs[4*i]) | byte(c.coeffs[4*i+1]<<2) | byte(c.coeffs[4*i+2]<<4) | byte(c.coeffs[4*i+3]<<6)
func decodeB(b *poly, c *poly, r []byte) {
for i := 0; i < paramN/4; i++ {
c.coeffs[4*i+0] = uint16(r[PolyBytes+i]) & 0x03
c.coeffs[4*i+1] = uint16(r[PolyBytes+i]>>2) & 0x03
c.coeffs[4*i+2] = uint16(r[PolyBytes+i]>>4) & 0x03
c.coeffs[4*i+3] = uint16(r[PolyBytes+i] >> 6)
func memwipe(b []byte) {
for i := range b {
b[i] = 0
// PublicKeyAlice is Alice's NewHope public key.
type PublicKeyAlice struct {
Send [SendASize]byte
// PrivateKeyAlice is Alice's NewHope private key.
type PrivateKeyAlice struct {
sk poly
// Reset clears all sensitive information such that it no longer appears in
// memory.
func (k *PrivateKeyAlice) Reset() {
// GenerateKeyPairAlice returns a private/public key pair. The private key is
// generated using the given reader, which must return random data. The
// receiver side of the key exchange (aka "Bob") MUST use KeyExchangeBob()
// instead of this routine.
func GenerateKeyPairAlice(rand io.Reader) (*PrivateKeyAlice, *PublicKeyAlice, error) {
var a, e, pk, r poly
var seed, noiseSeed [SeedBytes]byte
// seed <- Sample({0, 1}^256)
if _, err := io.ReadFull(rand, seed[:]); err != nil {
return nil, nil, err
seed = sha3.Sum256(seed[:]) // Don't send output of system RNG.
// a <- Parse(SHAKE-128(seed))
a.uniform(&seed, TorSampling)
// s, e <- Sample(psi(n, 12))
if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
return nil, nil, err
defer memwipe(noiseSeed[:])
privKey := new(PrivateKeyAlice)
privKey.sk.getNoise(&noiseSeed, 0)
e.getNoise(&noiseSeed, 1)
// b <- as + e
pubKey := new(PublicKeyAlice)
r.pointwise(&privKey.sk, &a)
pk.add(&e, &r)
encodeA(pubKey.Send[:], &pk, &seed)
return privKey, pubKey, nil
// PublicKeyBob is Bob's NewHope public key.
type PublicKeyBob struct {
Send [SendBSize]byte
// KeyExchangeBob is the Responder side of the NewHope key exchange. The
// shared secret and "public key" (key + reconciliation data) are generated
// using the given reader, which must return random data.
func KeyExchangeBob(rand io.Reader, alicePk *PublicKeyAlice) (*PublicKeyBob, []byte, error) {
var pka, a, sp, ep, u, v, epp, r poly
var seed, noiseSeed [SeedBytes]byte
if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
return nil, nil, err
defer memwipe(noiseSeed[:])
// a <- Parse(SHAKE-128(seed))
decodeA(&pka, &seed, alicePk.Send[:])
a.uniform(&seed, TorSampling)
// s', e', e'' <- Sample(psi(n, 12))
sp.getNoise(&noiseSeed, 0)
ep.getNoise(&noiseSeed, 1)
epp.getNoise(&noiseSeed, 2)
// u <- as' + e'
u.pointwise(&a, &sp)
u.add(&u, &ep)
// v <- bs' + e''
v.pointwise(&pka, &sp)
v.add(&v, &epp)
// r <- Sample(HelpRec(v))
r.helpRec(&v, &noiseSeed, 3)
pubKey := new(PublicKeyBob)
encodeB(pubKey.Send[:], &u, &r)
// nu <- Rec(v, r)
var nu [SharedSecretSize]byte
rec(&nu, &v, &r)
// mu <- SHA3-256(nu)
mu := sha3.Sum256(nu[:])
// Scrub the sensitive stuff...
return pubKey, mu[:], nil
// KeyExchangeAlice is the Initiaitor side of the NewHope key exchange. The
// provided private key is obliterated prior to returning.
func KeyExchangeAlice(bobPk *PublicKeyBob, aliceSk *PrivateKeyAlice) ([]byte, error) {
var u, r, vp poly
decodeB(&u, &r, bobPk.Send[:])
// v' <- us
vp.pointwise(&aliceSk.sk, &u)
// nu <- Rec(v', r)
var nu [SharedSecretSize]byte
rec(&nu, &vp, &r)
// mu <- Sha3-256(nu)
mu := sha3.Sum256(nu[:])
// Scrub the sensitive stuff...
return mu[:], nil
@ -0,0 +1,166 @@
// newhope_simple.go - NewHope-Simple interface.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
import (
const (
// HighBytes is the length of the encoded secret in bytes.
HighBytes = 384
// SendASimpleSize is the length of Alice's NewHope-Simple public key in
// bytes.
SendASimpleSize = PolyBytes + SeedBytes
// SendBSimpleSize is the length of Bob's NewHope-Simple public key in
// bytes.
SendBSimpleSize = PolyBytes + HighBytes
func encodeBSimple(r []byte, b *poly, v *poly) {
func decodeBSimple(b *poly, v *poly, r []byte) {
// PublicKeySimpleAlice is Alice's NewHope-Simple public key.
type PublicKeySimpleAlice struct {
Send [SendASimpleSize]byte
// PrivateKeySimpleAlice is Alice's NewHope-Simple private key.
type PrivateKeySimpleAlice struct {
sk poly
// Reset clears all sensitive information such that it no longer appears in
// memory.
func (k *PrivateKeySimpleAlice) Reset() {
// GenerateKeyPairSimpleAlice returns a NewHope-Simple private/public key pair.
// The private key is generated using the given reader, which must return
// random data. The receiver side of the key exchange (aka "Bob") MUST use
// KeyExchangeSimpleBob() instead of this routine.
func GenerateKeyPairSimpleAlice(rand io.Reader) (*PrivateKeySimpleAlice, *PublicKeySimpleAlice, error) {
var a, e, pk, r poly
var seed, noiseSeed [SeedBytes]byte
if _, err := io.ReadFull(rand, seed[:]); err != nil {
return nil, nil, err
seed = sha3.Sum256(seed[:]) // Don't send output of system RNG.
a.uniform(&seed, TorSampling)
if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
return nil, nil, err
defer memwipe(noiseSeed[:])
privKey := new(PrivateKeySimpleAlice)
privKey.sk.getNoise(&noiseSeed, 0)
e.getNoise(&noiseSeed, 1)
pubKey := new(PublicKeySimpleAlice)
r.pointwise(&privKey.sk, &a)
pk.add(&e, &r)
encodeA(pubKey.Send[:], &pk, &seed)
return privKey, pubKey, nil
// PublicKeySimpleBob is Bob's NewHope-Simple public key.
type PublicKeySimpleBob struct {
Send [SendBSimpleSize]byte
// KeyExchangeSimpleBob is the Responder side of the NewHope-Simple key
// exchange. The shared secret and "public key" are generated using the
// given reader, which must return random data.
func KeyExchangeSimpleBob(rand io.Reader, alicePk *PublicKeySimpleAlice) (*PublicKeySimpleBob, []byte, error) {
var pka, a, sp, ep, bp, v, epp, m poly
var seed, noiseSeed [SeedBytes]byte
if _, err := io.ReadFull(rand, noiseSeed[:]); err != nil {
return nil, nil, err
defer memwipe(noiseSeed[:])
var sharedKey [SharedSecretSize]byte
if _, err := io.ReadFull(rand, sharedKey[:]); err != nil {
return nil, nil, err
defer memwipe(sharedKey[:])
sharedKey = sha3.Sum256(sharedKey[:])
decodeA(&pka, &seed, alicePk.Send[:])
a.uniform(&seed, TorSampling)
sp.getNoise(&noiseSeed, 0)
ep.getNoise(&noiseSeed, 1)
bp.pointwise(&a, &sp)
bp.add(&bp, &ep)
v.pointwise(&pka, &sp)
epp.getNoise(&noiseSeed, 2)
v.add(&v, &epp)
v.add(&v, &m) // add key
pubKey := new(PublicKeySimpleBob)
encodeBSimple(pubKey.Send[:], &bp, &v)
mu := sha3.Sum256(sharedKey[:])
// Scrub the sensitive stuff...
return pubKey, mu[:], nil
// KeyExchangeSimpleAlice is the Initiaitor side of the NewHope-Simple key
// exchange. The provided private key is obliterated prior to returning.
func KeyExchangeSimpleAlice(bobPk *PublicKeySimpleBob, aliceSk *PrivateKeySimpleAlice) ([]byte, error) {
var v, bp, k poly
decodeBSimple(&bp, &v, bobPk.Send[:])
k.pointwise(&aliceSk.sk, &bp)
k.sub(&k, &v)
var sharedKey [SharedSecretSize]byte
// mu <- Sha3-256(v')
mu := sha3.Sum256(sharedKey[:])
// Scrub the sensitive stuff...
return mu[:], nil
@ -0,0 +1,131 @@
// ntt.go - NewHope Number Theoretic Transform.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
var bitrevTable = [paramN]uint16{
0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, 192, 704, 448, 960,
32, 544, 288, 800, 160, 672, 416, 928, 96, 608, 352, 864, 224, 736, 480,
992, 16, 528, 272, 784, 144, 656, 400, 912, 80, 592, 336, 848, 208, 720,
464, 976, 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880,
240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, 72, 584, 328,
840, 200, 712, 456, 968, 40, 552, 296, 808, 168, 680, 424, 936, 104,
616, 360, 872, 232, 744, 488, 1000, 24, 536, 280, 792, 152, 664, 408,
920, 88, 600, 344, 856, 216, 728, 472, 984, 56, 568, 312, 824, 184, 696,
440, 952, 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772,
132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, 36, 548, 292,
804, 164, 676, 420, 932, 100, 612, 356, 868, 228, 740, 484, 996, 20,
532, 276, 788, 148, 660, 404, 916, 84, 596, 340, 852, 212, 724, 468,
980, 52, 564, 308, 820, 180, 692, 436, 948, 116, 628, 372, 884, 244,
756, 500, 1012, 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332,
844, 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, 108,
620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, 156, 668, 412,
924, 92, 604, 348, 860, 220, 732, 476, 988, 60, 572, 316, 828, 188, 700,
444, 956, 124, 636, 380, 892, 252, 764, 508, 1020, 2, 514, 258, 770,
130, 642, 386, 898, 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290,
802, 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, 18, 530,
274, 786, 146, 658, 402, 914, 82, 594, 338, 850, 210, 722, 466, 978, 50,
562, 306, 818, 178, 690, 434, 946, 114, 626, 370, 882, 242, 754, 498,
1010, 10, 522, 266, 778, 138, 650, 394, 906, 74, 586, 330, 842, 202,
714, 458, 970, 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362,
874, 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, 90,
602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, 186, 698, 442,
954, 122, 634, 378, 890, 250, 762, 506, 1018, 6, 518, 262, 774, 134,
646, 390, 902, 70, 582, 326, 838, 198, 710, 454, 966, 38, 550, 294, 806,
166, 678, 422, 934, 102, 614, 358, 870, 230, 742, 486, 998, 22, 534,
278, 790, 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, 54,
566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, 246, 758, 502,
1014, 14, 526, 270, 782, 142, 654, 398, 910, 78, 590, 334, 846, 206,
718, 462, 974, 46, 558, 302, 814, 174, 686, 430, 942, 110, 622, 366,
878, 238, 750, 494, 1006, 30, 542, 286, 798, 158, 670, 414, 926, 94,
606, 350, 862, 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446,
958, 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, 129,
641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, 33, 545, 289, 801,
161, 673, 417, 929, 97, 609, 353, 865, 225, 737, 481, 993, 17, 529, 273,
785, 145, 657, 401, 913, 81, 593, 337, 849, 209, 721, 465, 977, 49, 561,
305, 817, 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009,
9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, 201, 713, 457,
969, 41, 553, 297, 809, 169, 681, 425, 937, 105, 617, 361, 873, 233,
745, 489, 1001, 25, 537, 281, 793, 153, 665, 409, 921, 89, 601, 345,
857, 217, 729, 473, 985, 57, 569, 313, 825, 185, 697, 441, 953, 121,
633, 377, 889, 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389,
901, 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, 165, 677,
421, 933, 101, 613, 357, 869, 229, 741, 485, 997, 21, 533, 277, 789,
149, 661, 405, 917, 85, 597, 341, 853, 213, 725, 469, 981, 53, 565, 309,
821, 181, 693, 437, 949, 117, 629, 373, 885, 245, 757, 501, 1013, 13,
525, 269, 781, 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461,
973, 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, 237,
749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, 93, 605, 349,
861, 221, 733, 477, 989, 61, 573, 317, 829, 189, 701, 445, 957, 125,
637, 381, 893, 253, 765, 509, 1021, 3, 515, 259, 771, 131, 643, 387,
899, 67, 579, 323, 835, 195, 707, 451, 963, 35, 547, 291, 803, 163, 675,
419, 931, 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, 147,
659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, 51, 563, 307, 819,
179, 691, 435, 947, 115, 627, 371, 883, 243, 755, 499, 1011, 11, 523,
267, 779, 139, 651, 395, 907, 75, 587, 331, 843, 203, 715, 459, 971, 43,
555, 299, 811, 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491,
1003, 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, 219,
731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, 123, 635, 379,
891, 251, 763, 507, 1019, 7, 519, 263, 775, 135, 647, 391, 903, 71, 583,
327, 839, 199, 711, 455, 967, 39, 551, 295, 807, 167, 679, 423, 935,
103, 615, 359, 871, 231, 743, 487, 999, 23, 535, 279, 791, 151, 663,
407, 919, 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, 183,
695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, 15, 527, 271,
783, 143, 655, 399, 911, 79, 591, 335, 847, 207, 719, 463, 975, 47, 559,
303, 815, 175, 687, 431, 943, 111, 623, 367, 879, 239, 751, 495, 1007,
31, 543, 287, 799, 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479,
991, 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, 255,
767, 511, 1023,
func (p *poly) bitrev() {
for i, v := range p.coeffs {
r := bitrevTable[i]
if uint16(i) < r {
p.coeffs[i] = p.coeffs[r]
p.coeffs[r] = v
func (p *poly) mulCoefficients(factors *[paramN]uint16) {
for i, v := range factors {
p.coeffs[i] = montgomeryReduce(uint32(p.coeffs[i]) * uint32(v))
func ntt(a *[paramN]uint16, omega *[paramN / 2]uint16) {
var distance uint
for i := uint(0); i < 10; i += 2 {
// Even level.
distance = (1 << i)
for start := uint(0); start < distance; start++ {
jTwiddle := 0
for j := start; j < paramN-1; j += 2 * distance {
w := uint32(omega[jTwiddle])
tmp := a[j]
a[j] = tmp + a[j+distance]
a[j+distance] = montgomeryReduce(w * (uint32(tmp) + 3*paramQ - uint32(a[j+distance])))
// Odd level.
distance <<= 1
for start := uint(0); start < distance; start++ {
jTwiddle := 0
for j := start; j < paramN-1; j += 2 * distance {
w := uint32(omega[jTwiddle])
tmp := a[j]
a[j] = barrettReduce(tmp + a[j+distance])
a[j+distance] = montgomeryReduce(w * (uint32(tmp) + 3*paramQ - uint32(a[j+distance])))
@ -0,0 +1,17 @@
// params.go - NewHope parameters.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
const (
paramN = 1024
paramK = 16 // used in sampler
paramQ = 12289
// SeedBytes is the size of the seed in bytes.
SeedBytes = 32
@ -0,0 +1,212 @@
// poly.go - NewHope polynomial.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
import (
const (
// PolyBytes is the length of an encoded polynomial in bytes.
PolyBytes = 1792
shake128Rate = 168 // Stupid that this isn't exposed.
type poly struct {
coeffs [paramN]uint16
func (p *poly) reset() {
for i := range p.coeffs {
p.coeffs[i] = 0
func (p *poly) fromBytes(a []byte) {
for i := 0; i < paramN/4; i++ {
p.coeffs[4*i+0] = uint16(a[7*i+0]) | ((uint16(a[7*i+1]) & 0x3f) << 8)
p.coeffs[4*i+1] = (uint16(a[7*i+1]) >> 6) | (uint16(a[7*i+2]) << 2) | ((uint16(a[7*i+3]) & 0x0f) << 10)
p.coeffs[4*i+2] = (uint16(a[7*i+3]) >> 4) | (uint16(a[7*i+4]) << 4) | ((uint16(a[7*i+5]) & 0x03) << 12)
p.coeffs[4*i+3] = (uint16(a[7*i+5]) >> 2) | (uint16(a[7*i+6]) << 6)
func (p *poly) toBytes(r []byte) {
for i := 0; i < paramN/4; i++ {
// Make sure that coefficients have only 14 bits.
t0 := barrettReduce(p.coeffs[4*i+0])
t1 := barrettReduce(p.coeffs[4*i+1])
t2 := barrettReduce(p.coeffs[4*i+2])
t3 := barrettReduce(p.coeffs[4*i+3])
// Make sure that coefficients are in [0,q]
m := t0 - paramQ
c := int16(m)
c >>= 15
t0 = m ^ ((t0 ^ m) & uint16(c))
m = t1 - paramQ
c = int16(m)
c >>= 15
t1 = m ^ ((t1 ^ m) & uint16(c))
m = t2 - paramQ
c = int16(m)
c >>= 15
t2 = m ^ ((t2 ^ m) & uint16(c))
m = t3 - paramQ
c = int16(m)
c >>= 15
t3 = m ^ ((t3 ^ m) & uint16(c))
r[7*i+0] = byte(t0 & 0xff)
r[7*i+1] = byte(t0>>8) | byte(t1<<6)
r[7*i+2] = byte(t1 >> 2)
r[7*i+3] = byte(t1>>10) | byte(t2<<4)
r[7*i+4] = byte(t2 >> 4)
r[7*i+5] = byte(t2>>12) | byte(t3<<2)
r[7*i+6] = byte(t3 >> 6)
func (p *poly) discardTo(xbuf []byte) bool {
var x [shake128Rate * 16 / 2]uint16
for i := range x {
x[i] = binary.LittleEndian.Uint16(xbuf[i*2:])
for i := 0; i < 16; i++ {
// Check whether we're safe:
r := int(0)
for i := 1000; i < 1024; i++ {
r |= 61444 - int(x[i])
if r>>31 != 0 {
return true
// If we are, copy coefficients to polynomial:
for i := range p.coeffs {
p.coeffs[i] = x[i]
return false
func (p *poly) uniform(seed *[SeedBytes]byte, torSampling bool) {
if !torSampling {
// Reference version, vartime.
nBlocks := 14
var buf [shake128Rate * 14]byte
// h and buf are left unscrubbed because the output is public.
h := sha3.NewShake128()
_, _ = h.Write(seed[:])
_, _ = h.Read(buf[:])
for ctr, pos := 0, 0; ctr < paramN; {
val := binary.LittleEndian.Uint16(buf[pos:])
if val < 5*paramQ {
p.coeffs[ctr] = val
pos += 2
if pos > shake128Rate*nBlocks-2 {
nBlocks = 1
_, _ = h.Read(buf[:shake128Rate])
pos = 0
} else {
// `torref` version, every valid `a` is generate in constant time,
// though the number of attempts varies.
const nBlocks = 16
var buf [shake128Rate * nBlocks]byte
// h and buf are left unscrubbed because the output is public.
h := sha3.NewShake128()
_, _ = h.Write(seed[:])
for {
_, _ = h.Read(buf[:])
if !p.discardTo(buf[:]) {
func (p *poly) getNoise(seed *[SeedBytes]byte, nonce byte) {
// The `ref` code uses a uint32 vector instead of a byte vector,
// but converting between the two in Go is cumbersome.
var buf [4 * paramN]byte
var n [8]byte
n[0] = nonce
stream, err := chacha20.New(seed[:], n[:])
if err != nil {
for i := 0; i < paramN; i++ {
t := binary.LittleEndian.Uint32(buf[4*i:])
d := uint32(0)
for j := uint(0); j < 8; j++ {
d += (t >> j) & 0x01010101
a := ((d >> 8) & 0xff) + (d & 0xff)
b := (d >> 24) + ((d >> 16) & 0xff)
p.coeffs[i] = uint16(a) + paramQ - uint16(b)
// Scrub the random bits...
func (p *poly) pointwise(a, b *poly) {
for i := range p.coeffs {
t := montgomeryReduce(3186 * uint32(b.coeffs[i])) // t is now in Montgomery domain
p.coeffs[i] = montgomeryReduce(uint32(a.coeffs[i]) * uint32(t)) // p.coeffs[i] is back in normal domain
func (p *poly) add(a, b *poly) {
for i := range p.coeffs {
p.coeffs[i] = barrettReduce(a.coeffs[i] + b.coeffs[i])
func (p *poly) ntt() {
ntt(&p.coeffs, &omegasMontgomery)
func (p *poly) invNtt() {
ntt(&p.coeffs, &omegasInvMontgomery)
func init() {
if paramK != 16 {
panic("poly.getNoise() only supports k=16")
@ -0,0 +1,99 @@
// poly_simple.go - NewHope-Simple polynomial.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
func coeffFreeze(x uint16) uint16 {
var c int16
r := barrettReduce(x)
m := r - paramQ
c = int16(m)
c >>= 15
r = m ^ ((r ^ m) & uint16(c))
return r
// Computes abs(x-Q/2)
func flipAbs(x uint16) uint16 {
r := int16(coeffFreeze(x))
r = r - paramQ/2
m := r >> 15
return uint16((r + m) ^ m)
func (p *poly) compress(r []byte) {
var t [8]uint32
for i, k := 0, 0; i < paramN; i, k = i+8, k+3 {
for j := range t {
t[j] = uint32(coeffFreeze(p.coeffs[i+j]))
t[j] = (((t[j] << 3) + paramQ/2) / paramQ) & 0x7
r[k] = byte(t[0]) | byte(t[1]<<3) | byte(t[2]<<6)
r[k+1] = byte(t[2]>>2) | byte(t[3]<<1) | byte(t[4]<<4) | byte(t[5]<<7)
r[k+2] = byte(t[5]>>1) | byte(t[6]<<2) | byte(t[7]<<5)
for i := range t {
t[i] = 0
func (p *poly) decompress(a []byte) {
for i := 0; i < paramN; i += 8 {
a0, a1, a2 := uint16(a[0]), uint16(a[1]), uint16(a[2])
p.coeffs[i+0] = a0 & 7
p.coeffs[i+1] = (a0 >> 3) & 7
p.coeffs[i+2] = (a0 >> 6) | ((a1 << 2) & 4)
p.coeffs[i+3] = (a1 >> 1) & 7
p.coeffs[i+4] = (a1 >> 4) & 7
p.coeffs[i+5] = (a1 >> 7) | ((a2 << 1) & 6)
p.coeffs[i+6] = (a2 >> 2) & 7
p.coeffs[i+7] = (a2 >> 5)
a = a[3:]
for j := 0; j < 8; j++ {
p.coeffs[i+j] = uint16((uint32(p.coeffs[i+j])*paramQ + 4) >> 3)
func (p *poly) fromMsg(msg []byte) {
for i := uint(0); i < 32; i++ { // XXX: const for 32
for j := uint(0); j < 8; j++ {
mask := -(uint16((msg[i] >> j) & 1))
p.coeffs[8*i+j+0] = mask & (paramQ / 2)
p.coeffs[8*i+j+256] = mask & (paramQ / 2)
p.coeffs[8*i+j+512] = mask & (paramQ / 2)
p.coeffs[8*i+j+768] = mask & (paramQ / 2)
func (p *poly) toMsg(msg []byte) {
for i := uint(0); i < 256; i++ {
t := flipAbs(p.coeffs[i+0])
t += flipAbs(p.coeffs[i+256])
t += flipAbs(p.coeffs[i+512])
t += flipAbs(p.coeffs[i+768])
//t = (~(t - PARAM_Q));
t = (t - paramQ)
t >>= 15
msg[i>>3] |= byte(t << (i & 7))
func (p *poly) sub(a, b *poly) {
for i := range p.coeffs {
p.coeffs[i] = barrettReduce(a.coeffs[i] + 3*paramQ - b.coeffs[i])
@ -0,0 +1,287 @@
// precomp.go - NewHope precomputed tables.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
var omegasMontgomery = [paramN / 2]uint16{
4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344,
11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789,
7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843,
5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879,
11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702,
7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782,
6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445,
5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404,
7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127,
8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579,
6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956,
6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871,
3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174,
3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369,
11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942,
10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634,
6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64,
3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570,
4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958,
10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295,
11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167,
1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051,
9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255,
11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154,
1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077,
10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134,
5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949,
9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336,
6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525,
3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950,
9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901,
1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558,
8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741,
1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435,
7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302,
6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481,
5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511,
8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084,
1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136,
7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048,
2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919,
8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374,
2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195,
2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174,
var omegasInvMontgomery = [paramN / 2]uint16{
4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 10316, 6715, 1278, 9945,
3514, 11248, 11271, 5925, 147, 8500, 7840, 6833, 5537, 4749, 4467, 7500,
11099, 9606, 6171, 8471, 8429, 5445, 11239, 7753, 9090, 12233, 5529,
5206, 10587, 1987, 11635, 3565, 5415, 8646, 6153, 6427, 7341, 6152,
10561, 400, 8410, 1922, 2033, 8291, 1359, 6854, 11035, 973, 8579, 6093,
6950, 5446, 11821, 8301, 11907, 316, 52, 3174, 10966, 9523, 6055, 8953,
11612, 6415, 2505, 5906, 10710, 11858, 8332, 9450, 10162, 151, 3482,
787, 5468, 1010, 4169, 9162, 5241, 9369, 7509, 8844, 7232, 4698, 192,
1321, 10240, 4912, 885, 6281, 10333, 7280, 8757, 11286, 58, 12048,
12147, 11184, 8812, 6608, 2844, 3438, 4212, 11314, 8687, 6068, 421,
8209, 3600, 3263, 7665, 6077, 7507, 5886, 3029, 6695, 4213, 504, 11684,
2302, 1962, 1594, 6328, 7183, 168, 2692, 8960, 4298, 5184, 11089, 6122,
9734, 10929, 3956, 5297, 6170, 3762, 9370, 4016, 4077, 6523, 652, 11994,
6099, 1146, 11341, 11964, 10885, 6299, 1159, 8240, 8561, 11177, 2078,
10331, 4322, 11367, 441, 4079, 11231, 3150, 1319, 8243, 709, 8049, 8719,
11454, 6224, 3054, 6803, 3123, 10542, 4433, 6370, 7032, 3834, 8633,
12225, 9830, 683, 1566, 5782, 9786, 9341, 12115, 723, 3009, 1693, 5735,
2655, 2738, 6421, 11942, 2925, 1975, 8532, 3315, 11863, 4754, 1858,
1583, 6347, 2500, 10800, 6374, 1483, 12240, 1263, 1815, 5383, 10777,
350, 6920, 10232, 4493, 9087, 8855, 8760, 9381, 218, 9928, 10446, 9259,
4115, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 6381, 11836,
8517, 418, 6860, 7515, 1293, 7552, 2767, 156, 8298, 8320, 10008, 5876,
5333, 10258, 10115, 4372, 2847, 7875, 8232, 9018, 8925, 1689, 8236,
2645, 5042, 9984, 7094, 9509, 1484, 7394, 3, 4437, 160, 3149, 113, 7370,
10123, 3915, 6998, 2704, 8653, 4938, 1426, 7635, 10512, 1663, 6957,
3510, 2370, 2865, 3978, 9320, 3247, 9603, 6882, 3186, 10659, 10163,
1153, 9405, 8241, 10040, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531,
5191, 9153, 2399, 8889, 3000, 671, 243, 3016, 3763, 10849, 12262, 9223,
10657, 7205, 11272, 7404, 7575, 8146, 10752, 242, 2678, 3704, 11744,
5019, 3833, 3778, 11899, 773, 5101, 11222, 9888, 442, 2912, 5698, 11935,
4861, 7277, 9808, 11244, 2859, 3780, 11414, 4976, 10682, 7201, 8005,
11287, 5011, 6267, 2987, 2437, 3646, 2566, 10102, 9867, 6250, 5444,
2381, 11796, 8193, 4337, 11854, 1912, 1378, 404, 7644, 1065, 2143,
11121, 5277, 3248, 11082, 2548, 8058, 8907, 11934, 1759, 8582, 3694,
7110, 12144, 6747, 8652, 3459, 2731, 8357, 6378, 7399, 10861, 1696,
9863, 334, 7657, 6534, 11029, 4388, 11560, 3241, 10276, 9000, 9408,
3284, 10200, 7197, 6498, 544, 2468, 339, 11267, 9, 2842, 480, 5331,
7300, 1673, 4278, 4177, 8705, 9764, 1381, 7837, 2396, 8340, 8993, 4354,
130, 6915, 2837, 11462, 5767, 953, 8541, 9813, 118, 7222, 2197, 3006,
9545, 563, 9314, 2625, 11340, 4821, 2639, 7266, 5828, 6561, 7698, 3328,
6512, 1351, 7311, 6553, 8155, 1305, 722, 5146, 4043, 12288, 10810, 2545,
3621, 8747, 8785, 1646, 1212, 5860, 3195, 7203, 10963, 3201, 3014, 955,
11499, 9970, 11119, 3135, 3712, 7443, 9542, 7484, 8736, 9995, 11227,
1635, 9521, 1177, 8034, 140, 10436, 11563, 7678, 4320, 11289, 9198,
12208, 2963, 7393, 2366, 9238,
var psisBitrevMontgomery = [paramN]uint16{
4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344,
11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789,
7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843,
5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879,
11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702,
7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782,
6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445,
5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404,
7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127,
8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579,
6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956,
6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871,
3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174,
3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369,
11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942,
10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634,
6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64,
3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570,
4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958,
10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295,
11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167,
1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051,
9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255,
11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154,
1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077,
10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134,
5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949,
9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336,
6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525,
3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950,
9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901,
1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558,
8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741,
1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435,
7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302,
6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481,
5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511,
8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084,
1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136,
7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048,
2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919,
8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374,
2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195,
2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174,
3947, 11951, 2455, 6599, 10545, 10975, 3654, 2894, 7681, 7126, 7287,
12269, 4119, 3343, 2151, 1522, 7174, 7350, 11041, 2442, 2148, 5959,
6492, 8330, 8945, 5598, 3624, 10397, 1325, 6565, 1945, 11260, 10077,
2674, 3338, 3276, 11034, 506, 6505, 1392, 5478, 8778, 1178, 2776, 3408,
10347, 11124, 2575, 9489, 12096, 6092, 10058, 4167, 6085, 923, 11251,
11912, 4578, 10669, 11914, 425, 10453, 392, 10104, 8464, 4235, 8761,
7376, 2291, 3375, 7954, 8896, 6617, 7790, 1737, 11667, 3982, 9342, 6680,
636, 6825, 7383, 512, 4670, 2900, 12050, 7735, 994, 1687, 11883, 7021,
146, 10485, 1403, 5189, 6094, 2483, 2054, 3042, 10945, 3981, 10821,
11826, 8882, 8151, 180, 9600, 7684, 5219, 10880, 6780, 204, 11232, 2600,
7584, 3121, 3017, 11053, 7814, 7043, 4251, 4739, 11063, 6771, 7073,
9261, 2360, 11925, 1928, 11825, 8024, 3678, 3205, 3359, 11197, 5209,
8581, 3238, 8840, 1136, 9363, 1826, 3171, 4489, 7885, 346, 2068, 1389,
8257, 3163, 4840, 6127, 8062, 8921, 612, 4238, 10763, 8067, 125, 11749,
10125, 5416, 2110, 716, 9839, 10584, 11475, 11873, 3448, 343, 1908,
4538, 10423, 7078, 4727, 1208, 11572, 3589, 2982, 1373, 1721, 10753,
4103, 2429, 4209, 5412, 5993, 9011, 438, 3515, 7228, 1218, 8347, 5232,
8682, 1327, 7508, 4924, 448, 1014, 10029, 12221, 4566, 5836, 12229,
2717, 1535, 3200, 5588, 5845, 412, 5102, 7326, 3744, 3056, 2528, 7406,
8314, 9202, 6454, 6613, 1417, 10032, 7784, 1518, 3765, 4176, 5063, 9828,
2275, 6636, 4267, 6463, 2065, 7725, 3495, 8328, 8755, 8144, 10533, 5966,
12077, 9175, 9520, 5596, 6302, 8400, 579, 6781, 11014, 5734, 11113,
11164, 4860, 1131, 10844, 9068, 8016, 9694, 3837, 567, 9348, 7000, 6627,
7699, 5082, 682, 11309, 5207, 4050, 7087, 844, 7434, 3769, 293, 9057,
6940, 9344, 10883, 2633, 8190, 3944, 5530, 5604, 3480, 2171, 9282,
11024, 2213, 8136, 3805, 767, 12239, 216, 11520, 6763, 10353, 7, 8566,
845, 7235, 3154, 4360, 3285, 10268, 2832, 3572, 1282, 7559, 3229, 8360,
10583, 6105, 3120, 6643, 6203, 8536, 8348, 6919, 3536, 9199, 10891,
11463, 5043, 1658, 5618, 8787, 5789, 4719, 751, 11379, 6389, 10783,
3065, 7806, 6586, 2622, 5386, 510, 7628, 6921, 578, 10345, 11839, 8929,
4684, 12226, 7154, 9916, 7302, 8481, 3670, 11066, 2334, 1590, 7878,
10734, 1802, 1891, 5103, 6151, 8820, 3418, 7846, 9951, 4693, 417, 9996,
9652, 4510, 2946, 5461, 365, 881, 1927, 1015, 11675, 11009, 1371, 12265,
2485, 11385, 5039, 6742, 8449, 1842, 12217, 8176, 9577, 4834, 7937,
9461, 2643, 11194, 3045, 6508, 4094, 3451, 7911, 11048, 5406, 4665,
3020, 6616, 11345, 7519, 3669, 5287, 1790, 7014, 5410, 11038, 11249,
2035, 6125, 10407, 4565, 7315, 5078, 10506, 2840, 2478, 9270, 4194,
9195, 4518, 7469, 1160, 6878, 2730, 10421, 10036, 1734, 3815, 10939,
5832, 10595, 10759, 4423, 8420, 9617, 7119, 11010, 11424, 9173, 189,
10080, 10526, 3466, 10588, 7592, 3578, 11511, 7785, 9663, 530, 12150,
8957, 2532, 3317, 9349, 10243, 1481, 9332, 3454, 3758, 7899, 4218, 2593,
11410, 2276, 982, 6513, 1849, 8494, 9021, 4523, 7988, 8, 457, 648, 150,
8000, 2307, 2301, 874, 5650, 170, 9462, 2873, 9855, 11498, 2535, 11169,
5808, 12268, 9687, 1901, 7171, 11787, 3846, 1573, 6063, 3793, 466,
11259, 10608, 3821, 6320, 4649, 6263, 2929,
var psisInvMontgomery = [paramN]uint16{
256, 10570, 1510, 7238, 1034, 7170, 6291, 7921, 11665, 3422, 4000, 2327,
2088, 5565, 795, 10647, 1521, 5484, 2539, 7385, 1055, 7173, 8047, 11683,
1669, 1994, 3796, 5809, 4341, 9398, 11876, 12230, 10525, 12037, 12253,
3506, 4012, 9351, 4847, 2448, 7372, 9831, 3160, 2207, 5582, 2553, 7387,
6322, 9681, 1383, 10731, 1533, 219, 5298, 4268, 7632, 6357, 9686, 8406,
4712, 9451, 10128, 4958, 5975, 11387, 8649, 11769, 6948, 11526, 12180,
1740, 10782, 6807, 2728, 7412, 4570, 4164, 4106, 11120, 12122, 8754,
11784, 3439, 5758, 11356, 6889, 9762, 11928, 1704, 1999, 10819, 12079,
12259, 7018, 11536, 1648, 1991, 2040, 2047, 2048, 10826, 12080, 8748,
8272, 8204, 1172, 1923, 7297, 2798, 7422, 6327, 4415, 7653, 6360, 11442,
12168, 7005, 8023, 9924, 8440, 8228, 2931, 7441, 1063, 3663, 5790, 9605,
10150, 1450, 8985, 11817, 10466, 10273, 12001, 3470, 7518, 1074, 1909,
7295, 9820, 4914, 702, 5367, 7789, 8135, 9940, 1420, 3714, 11064, 12114,
12264, 1752, 5517, 9566, 11900, 1700, 3754, 5803, 829, 1874, 7290, 2797,
10933, 5073, 7747, 8129, 6428, 6185, 11417, 1631, 233, 5300, 9535,
10140, 11982, 8734, 8270, 2937, 10953, 8587, 8249, 2934, 9197, 4825,
5956, 4362, 9401, 1343, 3703, 529, 10609, 12049, 6988, 6265, 895, 3639,
4031, 4087, 4095, 585, 10617, 8539, 4731, 4187, 9376, 3095, 9220, 10095,
10220, 1460, 10742, 12068, 1724, 5513, 11321, 6884, 2739, 5658, 6075,
4379, 11159, 10372, 8504, 4726, 9453, 3106, 7466, 11600, 10435, 8513,
9994, 8450, 9985, 3182, 10988, 8592, 2983, 9204, 4826, 2445, 5616, 6069,
867, 3635, 5786, 11360, 5134, 2489, 10889, 12089, 1727, 7269, 2794,
9177, 1311, 5454, 9557, 6632, 2703, 9164, 10087, 1441, 3717, 531, 3587,
2268, 324, 5313, 759, 1864, 5533, 2546, 7386, 9833, 8427, 4715, 11207,
1601, 7251, 4547, 11183, 12131, 1733, 10781, 10318, 1474, 10744, 5046,
4232, 11138, 10369, 6748, 964, 7160, 4534, 7670, 8118, 8182, 4680,
11202, 6867, 981, 8918, 1274, 182, 26, 7026, 8026, 11680, 12202, 10521,
1503, 7237, 4545, 5916, 9623, 8397, 11733, 10454, 3249, 9242, 6587, 941,
1890, 270, 10572, 6777, 9746, 6659, 6218, 6155, 6146, 878, 1881, 7291,
11575, 12187, 1741, 7271, 8061, 11685, 6936, 4502, 9421, 4857, 4205,
7623, 1089, 10689, 1527, 8996, 10063, 11971, 10488, 6765, 2722, 3900,
9335, 11867, 6962, 11528, 5158, 4248, 4118, 5855, 2592, 5637, 6072,
2623, 7397, 8079, 9932, 4930, 5971, 853, 3633, 519, 8852, 11798, 3441,
11025, 1575, 225, 8810, 11792, 12218, 3501, 9278, 3081, 9218, 4828,
7712, 8124, 11694, 12204, 3499, 4011, 573, 3593, 5780, 7848, 9899,
10192, 1456, 208, 7052, 2763, 7417, 11593, 10434, 12024, 8740, 11782,
10461, 3250, 5731, 7841, 9898, 1414, 202, 3540, 7528, 2831, 2160, 10842,
5060, 4234, 4116, 588, 84, 12, 7024, 2759, 9172, 6577, 11473, 1639,
9012, 3043, 7457, 6332, 11438, 1634, 1989, 9062, 11828, 8712, 11778,
12216, 10523, 6770, 9745, 10170, 4964, 9487, 6622, 946, 8913, 6540,
6201, 4397, 9406, 8366, 9973, 8447, 8229, 11709, 8695, 10020, 3187,
5722, 2573, 10901, 6824, 4486, 4152, 9371, 8361, 2950, 2177, 311, 1800,
9035, 8313, 11721, 3430, 490, 70, 10, 1757, 251, 3547, 7529, 11609,
3414, 7510, 4584, 4166, 9373, 1339, 5458, 7802, 11648, 1664, 7260, 9815,
10180, 6721, 9738, 10169, 8475, 8233, 9954, 1422, 8981, 1283, 5450,
11312, 1616, 3742, 11068, 10359, 4991, 713, 3613, 9294, 8350, 4704, 672,
96, 7036, 9783, 11931, 3460, 5761, 823, 10651, 12055, 10500, 1500, 5481,
783, 3623, 11051, 8601, 8251, 8201, 11705, 10450, 5004, 4226, 7626,
2845, 2162, 3820, 7568, 9859, 3164, 452, 10598, 1514, 5483, 6050, 6131,
4387, 7649, 8115, 6426, 918, 8909, 8295, 1185, 5436, 11310, 8638, 1234,
5443, 11311, 5127, 2488, 2111, 10835, 5059, 7745, 2862, 3920, 560, 80,
1767, 2008, 3798, 11076, 6849, 2734, 10924, 12094, 8750, 1250, 10712,
6797, 971, 7161, 1023, 8924, 4786, 7706, 4612, 4170, 7618, 6355, 4419,
5898, 11376, 10403, 10264, 6733, 4473, 639, 5358, 2521, 9138, 3061,
5704, 4326, 618, 5355, 765, 5376, 768, 7132, 4530, 9425, 3102, 9221,
6584, 11474, 10417, 10266, 12000, 6981, 6264, 4406, 2385, 7363, 4563,
4163, 7617, 9866, 3165, 9230, 11852, 10471, 5007, 5982, 11388, 5138,
734, 3616, 11050, 12112, 6997, 11533, 12181, 10518, 12036, 3475, 2252,
7344, 9827, 4915, 9480, 6621, 4457, 7659, 9872, 6677, 4465, 4149, 7615,
4599, 657, 3605, 515, 10607, 6782, 4480, 640, 1847, 3775, 5806, 2585,
5636, 9583, 1369, 10729, 8555, 10000, 11962, 5220, 7768, 8132, 8184,
9947, 1421, 203, 29, 8782, 11788, 1684, 10774, 10317, 4985, 9490, 8378,
4708, 11206, 5112, 5997, 7879, 11659, 12199, 8765, 10030, 4944, 5973,
6120, 6141, 6144, 7900, 11662, 1666, 238, 34, 3516, 5769, 9602, 8394,
9977, 6692, 956, 10670, 6791, 9748, 11926, 8726, 11780, 5194, 742, 106,
8793, 10034, 3189, 10989, 5081, 4237, 5872, 4350, 2377, 10873, 6820,
6241, 11425, 10410, 10265, 3222, 5727, 9596, 4882, 2453, 2106, 3812,
11078, 12116, 5242, 4260, 11142, 8614, 11764, 12214, 5256, 4262, 4120,
11122, 5100, 11262, 5120, 2487, 5622, 9581, 8391, 8221, 2930, 10952,
12098, 6995, 6266, 9673, 4893, 699, 3611, 4027, 5842, 11368, 1624, 232,
8811, 8281, 1183, 169, 8802, 3013, 2186, 5579, 797, 3625, 4029, 11109,
1587, 7249, 11569, 8675, 6506, 2685, 10917, 12093, 12261, 12285, 1755,
7273, 1039, 1904, 272, 3550, 9285, 3082, 5707, 6082, 4380, 7648, 11626,
5172, 4250, 9385, 8363, 8217, 4685, 5936, 848, 8899, 6538, 934, 1889,
3781, 9318, 10109, 10222, 6727, 961, 5404, 772, 5377, 9546, 8386, 1198,
8949, 3034, 2189, 7335, 4559, 5918, 2601, 10905, 5069, 9502, 3113, 7467,
8089, 11689, 5181, 9518, 8382, 2953, 3933, 4073, 4093, 7607, 8109, 2914,
5683, 4323, 11151, 1593, 10761, 6804, 972, 3650, 2277, 5592, 4310, 7638,
9869, 4921, 703, 1856, 9043, 4803, 9464, 1352, 8971, 11815, 5199, 7765,
6376, 4422, 7654, 2849, 407, 8836, 6529, 7955, 2892, 9191, 1313, 10721,
12065, 12257, 1751, 9028, 8312, 2943, 2176, 3822, 546, 78, 8789, 11789,
10462, 12028, 6985, 4509, 9422, 1346, 5459, 4291, 613, 10621, 6784,
9747, 3148, 7472, 2823, 5670, 810, 7138, 8042, 4660, 7688, 6365, 6176,
6149, 2634, 5643, 9584, 10147, 11983, 5223, 9524, 11894, 10477, 8519,
1217, 3685, 2282, 326, 10580, 3267, 7489, 4581, 2410, 5611, 11335, 6886,
8006, 8166, 11700, 3427, 11023, 8597, 10006, 3185, 455, 65, 5276, 7776,
4622, 5927, 7869, 9902, 11948, 5218, 2501, 5624, 2559, 10899, 1557,
1978, 10816, 10323, 8497, 4725, 675, 1852, 10798, 12076, 10503, 3256,
9243, 3076, 2195, 10847, 12083, 10504, 12034, 10497,
@ -0,0 +1,32 @@
// poly.go - NewHope reductions.
// To the extent possible under law, Yawning Angel has waived all copyright
// and related or neighboring rights to newhope, using the Creative
// Commons "CC0" public domain dedication. See LICENSE or
// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
package newhope
// Incomplete-reduction routines; for details on allowed input ranges
// and produced output ranges, see the description in the paper:
// https://cryptojedi.org/papers/#newhope
const (
qinv = 12287 // -inverse_mod(p,2^18)
rlog = 18
func montgomeryReduce(a uint32) uint16 {
u := a * qinv
u &= ((1 << rlog) - 1)
u *= paramQ
a = (a + u) >> 18
return uint16(a)
func barrettReduce(a uint16) uint16 {
u := (uint32(a) * 5) >> 16
u *= paramQ
a -= uint16(u)
return a
@ -0,0 +1,21 @@
@ -0,0 +1,197 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// Package chacha implements some low-level functions of the
// ChaCha cipher family.
package chacha // import "github.com/aead/chacha20/chacha"
import (
const (
// NonceSize is the size of the ChaCha20 nonce in bytes.
NonceSize = 8
// INonceSize is the size of the IETF-ChaCha20 nonce in bytes.
INonceSize = 12
// XNonceSize is the size of the XChaCha20 nonce in bytes.
XNonceSize = 24
// KeySize is the size of the key in bytes.
KeySize = 32
var (
useSSE2 bool
useSSSE3 bool
useAVX bool
useAVX2 bool
var (
errKeySize = errors.New("chacha20/chacha: bad key length")
errInvalidNonce = errors.New("chacha20/chacha: bad nonce length")
func setup(state *[64]byte, nonce, key []byte) (err error) {
if len(key) != KeySize {
err = errKeySize
var Nonce [16]byte
copy(tmpKey[:], key)
HChaCha20(&tmpKey, &hNonce, &tmpKey)
copy(Nonce[8:], nonce[16:])
initialize(state, tmpKey[:], &Nonce)
// BUG(aead): A "good" compiler will remove this (optimizations)
// But using the provided key instead of tmpKey,
// will change the key (-> probably confuses users)
for i := range tmpKey {
tmpKey[i] = 0
err = errInvalidNonce
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
// The length of the nonce determinds the version of ChaCha20:
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
// The rounds argument specifies the number of rounds performed for keystream
// generation - valid values are 8, 12 or 20. The src and dst may be the same slice
// but otherwise should not overlap. If len(dst) < len(src) this function panics.
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
func XORKeyStream(dst, src, nonce, key []byte, rounds int) {
if rounds != 20 && rounds != 12 && rounds != 8 {
panic("chacha20/chacha: bad number of rounds")
if len(dst) < len(src) {
panic("chacha20/chacha: dst buffer is to small")
if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) {
panic("chacha20/chacha: src is too large")
var block, state [64]byte
if err := setup(&state, nonce, key); err != nil {
xorKeyStream(dst, src, &block, &state, rounds)
// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r.
type Cipher struct {
state, block [64]byte
off int
rounds int // 20 for ChaCha20
noncesize int
// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r
// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time.
// The length of the nonce determinds the version of ChaCha20:
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) {
if rounds != 20 && rounds != 12 && rounds != 8 {
panic("chacha20/chacha: bad number of rounds")
c := new(Cipher)
if err := setup(&(c.state), nonce, key); err != nil {
return nil, err
c.rounds = rounds
if len(nonce) == INonceSize {
c.noncesize = INonceSize
} else {
c.noncesize = NonceSize
return c, nil
// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice
// but otherwise should not overlap. If len(dst) < len(src) the function panics.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
panic("chacha20/chacha: dst buffer is to small")
if c.off > 0 {
n := len(c.block[c.off:])
if len(src) <= n {
for i, v := range src {
dst[i] = v ^ c.block[c.off]
if c.off == 64 {
c.off = 0
for i, v := range c.block[c.off:] {
dst[i] = src[i] ^ v
src = src[n:]
dst = dst[n:]
c.off = 0
// check for counter overflow
blocksToXOR := len(src) / 64
if len(src)%64 != 0 {
var overflow bool
if c.noncesize == INonceSize {
overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR)
} else {
overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR)
if overflow {
panic("chacha20/chacha: counter overflow")
c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds)
// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher.
// This function always skips the unused keystream of the current 64 byte block.
func (c *Cipher) SetCounter(ctr uint64) {
if c.noncesize == INonceSize {
binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr))
} else {
binary.LittleEndian.PutUint64(c.state[48:], ctr)
c.off = 0
// HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key.
// It can be used as a key-derivation-function (KDF).
func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) }
@ -0,0 +1,406 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64,!gccgo,!appengine,!nacl
#include "const.s"
#include "macro.s"
#define TWO 0(SP)
#define C16 32(SP)
#define C8 64(SP)
#define STATE_0 96(SP)
#define STATE_1 128(SP)
#define STATE_2 160(SP)
#define STATE_3 192(SP)
#define TMP_0 224(SP)
#define TMP_1 256(SP)
// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamAVX2(SB), 4, $320-80
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ block+48(FP), BX
MOVQ state+56(FP), AX
MOVQ rounds+64(FP), DX
MOVQ src_len+32(FP), CX
ADDQ $32, SP
ANDQ $-32, SP
VPERM2I128 $0x22, Y2, Y0, Y0
VPERM2I128 $0x33, Y2, Y1, Y1
VPERM2I128 $0x22, Y3, Y2, Y2
VPERM2I128 $0x33, Y3, Y3, Y3
JZ done
VMOVDQU ·one_AVX2<>(SB), Y4
VMOVDQU ·rol16_AVX2<>(SB), Y4
VMOVDQU ·rol8_AVX2<>(SB), Y5
VMOVDQU ·two_AVX2<>(SB), Y6
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $192
JBE between_64_and_192
CMPQ CX, $320
JBE between_192_and_320
CMPQ CX, $448
JBE between_320_and_448
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
SUBQ $2, R9
JA chacha_loop_512
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
VPADDD Y2, Y14, Y14
VPADDD Y3, Y15, Y15
CMPQ CX, $512
JB less_than_512
XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
ADDQ $512, SI
ADDQ $512, DI
SUBQ $512, CX
CMPQ CX, $448
JA at_least_512
JZ done
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $192
JBE between_64_and_192
CMPQ CX, $320
JBE between_192_and_320
JMP between_320_and_448
XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4)
ADDQ $448, SI
ADDQ $448, DI
SUBQ $448, CX
JMP finalize
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
SUBQ $2, R9
JA chacha_loop_384
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
CMPQ CX, $384
JB less_than_384
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
SUBQ $384, CX
JE done
ADDQ $384, SI
ADDQ $384, DI
JMP between_0_and_64
XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
ADDQ $320, SI
ADDQ $320, DI
SUBQ $320, CX
JMP finalize
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
SUBQ $2, R9
JA chacha_loop_256
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
CMPQ CX, $256
JB less_than_256
XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
SUBQ $256, CX
JE done
ADDQ $256, SI
ADDQ $256, DI
JMP between_0_and_64
XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
ADDQ $192, SI
ADDQ $192, DI
SUBQ $192, CX
JMP finalize
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
SUBQ $2, R9
JA chacha_loop_128
CMPQ CX, $128
JB less_than_128
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
SUBQ $128, CX
JE done
ADDQ $128, SI
ADDQ $128, DI
JMP between_0_and_64
SUBQ $64, CX
JMP finalize
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
SUBQ $2, R9
JA chacha_loop_64
VMOVDQU ·one<>(SB), X0
CMPQ CX, $64
JB less_than_64
XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13)
SUBQ $64, CX
JMP done
XORQ R11, R11
XORQ R12, R12
MOVB 0(SI), R11
MOVB 0(BX), R12
XORQ R11, R12
MOVB R12, 0(DI)
JA xor_loop
MOVQ CX, ret+72(FP)
@ -0,0 +1,60 @@
// +build 386,!gccgo,!appengine,!nacl
package chacha
import (
func init() {
useSSE2 = cpu.X86.HasSSE2
useSSSE3 = cpu.X86.HasSSSE3
useAVX = false
useAVX2 = false
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
binary.LittleEndian.PutUint32(state[0:], sigma[0])
binary.LittleEndian.PutUint32(state[4:], sigma[1])
binary.LittleEndian.PutUint32(state[8:], sigma[2])
binary.LittleEndian.PutUint32(state[12:], sigma[3])
copy(state[16:], key[:])
copy(state[48:], nonce[:])
// This function is implemented in chacha_386.s
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_386.s
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_386.s
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
switch {
case useSSSE3:
hChaCha20SSSE3(out, nonce, key)
case useSSE2:
hChaCha20SSE2(out, nonce, key)
hChaCha20Generic(out, nonce, key)
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useSSE2 {
return xorKeyStreamSSE2(dst, src, block, state, rounds)
} else {
return xorKeyStreamGeneric(dst, src, block, state, rounds)
@ -0,0 +1,163 @@
// +build 386,!gccgo,!appengine,!nacl
#include "const.s"
#include "macro.s"
// FINALIZE xors len bytes from src and block using
// the temp. registers t0 and t1 and writes the result
// to dst.
#define FINALIZE(dst, src, block, len, t0, t1) \
XORL t0, t0; \
XORL t1, t1; \
MOVB 0(src), t0; \
MOVB 0(block), t1; \
XORL t0, t1; \
MOVB t1, 0(dst); \
INCL src; \
INCL block; \
INCL dst; \
DECL len; \
#define Dst DI
#define Nonce AX
#define Key BX
#define Rounds DX
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSE2(SB), 4, $0-12
MOVL out+0(FP), Dst
MOVL nonce+4(FP), Nonce
MOVL key+8(FP), Key
MOVOU ·sigma<>(SB), X0
MOVOU 0*16(Key), X1
SUBL $2, Rounds
JNZ chacha_loop
MOVOU X0, 0*16(Dst)
MOVOU X3, 1*16(Dst)
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSSE3(SB), 4, $0-12
MOVL out+0(FP), Dst
MOVL nonce+4(FP), Nonce
MOVL key+8(FP), Key
MOVOU ·sigma<>(SB), X0
MOVOU 0*16(Key), X1
MOVOU 1*16(Key), X2
MOVOU 0*16(Nonce), X3
MOVL $20, Rounds
MOVOU ·rol16<>(SB), X5
MOVOU ·rol8<>(SB), X6
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
SUBL $2, Rounds
JNZ chacha_loop
MOVOU X0, 0*16(Dst)
MOVOU X3, 1*16(Dst)
#undef Dst
#undef Nonce
#undef Key
#undef Rounds
#define State AX
#define Dst DI
#define Src SI
#define Len DX
#define Tmp0 BX
#define Tmp1 BP
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
MOVL dst_base+0(FP), Dst
MOVL src_base+12(FP), Src
MOVL state+28(FP), State
MOVL src_len+16(FP), Len
MOVL $0, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0
MOVOU 0*16(State), X0
MOVOU 1*16(State), X1
MOVOU 2*16(State), X2
MOVOU 3*16(State), X3
TESTL Len, Len
MOVL rounds+32(FP), Tmp0
SUBL $2, Tmp0
MOVOU 0*16(State), X0 // Restore X0 from state
MOVOU ·one<>(SB), X0
CMPL Len, $64
XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0)
MOVOU 0*16(State), X0 // Restore X0 from state
ADDL $64, Src
ADDL $64, Dst
SUBL $64, Len
JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte
MOVL block+24(FP), State
MOVOU X4, 0(State)
MOVOU X5, 16(State)
MOVOU X6, 32(State)
MOVOU X7, 48(State)
MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 < Len < 64
FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1)
MOVL state+28(FP), State
MOVOU X3, 3*16(State)
#undef State
#undef Dst
#undef Src
#undef Len
#undef Tmp0
#undef Tmp1
@ -0,0 +1,76 @@
// +build go1.7,amd64,!gccgo,!appengine,!nacl
package chacha
import "golang.org/x/sys/cpu"
func init() {
useSSE2 = cpu.X86.HasSSE2
useSSSE3 = cpu.X86.HasSSSE3
useAVX = cpu.X86.HasAVX
useAVX2 = cpu.X86.HasAVX2
// This function is implemented in chacha_amd64.s
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
// This function is implemented in chacha_amd64.s
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chachaAVX2_amd64.s
func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chachaAVX2_amd64.s
func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
switch {
case useAVX:
hChaCha20AVX(out, nonce, key)
case useSSSE3:
hChaCha20SSSE3(out, nonce, key)
case useSSE2:
hChaCha20SSE2(out, nonce, key)
hChaCha20Generic(out, nonce, key)
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
switch {
case useAVX2:
return xorKeyStreamAVX2(dst, src, block, state, rounds)
case useAVX:
return xorKeyStreamAVX(dst, src, block, state, rounds)
case useSSSE3:
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
case useSSE2:
return xorKeyStreamSSE2(dst, src, block, state, rounds)
return xorKeyStreamGeneric(dst, src, block, state, rounds)
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,319 @@
package chacha
import "encoding/binary"
var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int {
for len(src) >= 64 {
chachaGeneric(block, state, rounds)
for i, v := range block {
dst[i] = src[i] ^ v
src = src[64:]
dst = dst[64:]
n := len(src)
if n > 0 {
chachaGeneric(block, state, rounds)
for i, v := range src {
dst[i] = v ^ block[i]
return n
func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) {
v00 := binary.LittleEndian.Uint32(state[0:])
v01 := binary.LittleEndian.Uint32(state[4:])
v02 := binary.LittleEndian.Uint32(state[8:])
v03 := binary.LittleEndian.Uint32(state[12:])
v04 := binary.LittleEndian.Uint32(state[16:])
v05 := binary.LittleEndian.Uint32(state[20:])
v06 := binary.LittleEndian.Uint32(state[24:])
v07 := binary.LittleEndian.Uint32(state[28:])
v08 := binary.LittleEndian.Uint32(state[32:])
v09 := binary.LittleEndian.Uint32(state[36:])
v10 := binary.LittleEndian.Uint32(state[40:])
v11 := binary.LittleEndian.Uint32(state[44:])
v12 := binary.LittleEndian.Uint32(state[48:])
v13 := binary.LittleEndian.Uint32(state[52:])
v14 := binary.LittleEndian.Uint32(state[56:])
v15 := binary.LittleEndian.Uint32(state[60:])
s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
for i := 0; i < rounds; i += 2 {
v00 += v04
v12 ^= v00
v12 = (v12 << 16) | (v12 >> 16)
v08 += v12
v04 ^= v08
v04 = (v04 << 12) | (v04 >> 20)
v00 += v04
v12 ^= v00
v12 = (v12 << 8) | (v12 >> 24)
v08 += v12
v04 ^= v08
v04 = (v04 << 7) | (v04 >> 25)
v01 += v05
v13 ^= v01
v13 = (v13 << 16) | (v13 >> 16)
v09 += v13
v05 ^= v09
v05 = (v05 << 12) | (v05 >> 20)
v01 += v05
v13 ^= v01
v13 = (v13 << 8) | (v13 >> 24)
v09 += v13
v05 ^= v09
v05 = (v05 << 7) | (v05 >> 25)
v02 += v06
v14 ^= v02
v14 = (v14 << 16) | (v14 >> 16)
v10 += v14
v06 ^= v10
v06 = (v06 << 12) | (v06 >> 20)
v02 += v06
v14 ^= v02
v14 = (v14 << 8) | (v14 >> 24)
v10 += v14
v06 ^= v10
v06 = (v06 << 7) | (v06 >> 25)
v03 += v07
v15 ^= v03
v15 = (v15 << 16) | (v15 >> 16)
v11 += v15
v07 ^= v11
v07 = (v07 << 12) | (v07 >> 20)
v03 += v07
v15 ^= v03
v15 = (v15 << 8) | (v15 >> 24)
v11 += v15
v07 ^= v11
v07 = (v07 << 7) | (v07 >> 25)
v00 += v05
v15 ^= v00
v15 = (v15 << 16) | (v15 >> 16)
v10 += v15
v05 ^= v10
v05 = (v05 << 12) | (v05 >> 20)
v00 += v05
v15 ^= v00
v15 = (v15 << 8) | (v15 >> 24)
v10 += v15
v05 ^= v10
v05 = (v05 << 7) | (v05 >> 25)
v01 += v06
v12 ^= v01
v12 = (v12 << 16) | (v12 >> 16)
v11 += v12
v06 ^= v11
v06 = (v06 << 12) | (v06 >> 20)
v01 += v06
v12 ^= v01
v12 = (v12 << 8) | (v12 >> 24)
v11 += v12
v06 ^= v11
v06 = (v06 << 7) | (v06 >> 25)
v02 += v07
v13 ^= v02
v13 = (v13 << 16) | (v13 >> 16)
v08 += v13
v07 ^= v08
v07 = (v07 << 12) | (v07 >> 20)
v02 += v07
v13 ^= v02
v13 = (v13 << 8) | (v13 >> 24)
v08 += v13
v07 ^= v08
v07 = (v07 << 7) | (v07 >> 25)
v03 += v04
v14 ^= v03
v14 = (v14 << 16) | (v14 >> 16)
v09 += v14
v04 ^= v09
v04 = (v04 << 12) | (v04 >> 20)
v03 += v04
v14 ^= v03
v14 = (v14 << 8) | (v14 >> 24)
v09 += v14
v04 ^= v09
v04 = (v04 << 7) | (v04 >> 25)
v00 += s00
v01 += s01
v02 += s02
v03 += s03
v04 += s04
v05 += s05
v06 += s06
v07 += s07
v08 += s08
v09 += s09
v10 += s10
v11 += s11
v12 += s12
v13 += s13
v14 += s14
v15 += s15
binary.LittleEndian.PutUint32(state[48:], s12)
if s12 == 0 { // indicates overflow
binary.LittleEndian.PutUint32(state[52:], s13)
binary.LittleEndian.PutUint32(dst[0:], v00)
binary.LittleEndian.PutUint32(dst[4:], v01)
binary.LittleEndian.PutUint32(dst[8:], v02)
binary.LittleEndian.PutUint32(dst[12:], v03)
binary.LittleEndian.PutUint32(dst[16:], v04)
binary.LittleEndian.PutUint32(dst[20:], v05)
binary.LittleEndian.PutUint32(dst[24:], v06)
binary.LittleEndian.PutUint32(dst[28:], v07)
binary.LittleEndian.PutUint32(dst[32:], v08)
binary.LittleEndian.PutUint32(dst[36:], v09)
binary.LittleEndian.PutUint32(dst[40:], v10)
binary.LittleEndian.PutUint32(dst[44:], v11)
binary.LittleEndian.PutUint32(dst[48:], v12)
binary.LittleEndian.PutUint32(dst[52:], v13)
binary.LittleEndian.PutUint32(dst[56:], v14)
binary.LittleEndian.PutUint32(dst[60:], v15)
func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) {
v00 := sigma[0]
v01 := sigma[1]
v02 := sigma[2]
v03 := sigma[3]
v04 := binary.LittleEndian.Uint32(key[0:])
v05 := binary.LittleEndian.Uint32(key[4:])
v06 := binary.LittleEndian.Uint32(key[8:])
v07 := binary.LittleEndian.Uint32(key[12:])
v08 := binary.LittleEndian.Uint32(key[16:])
v09 := binary.LittleEndian.Uint32(key[20:])
v10 := binary.LittleEndian.Uint32(key[24:])
v11 := binary.LittleEndian.Uint32(key[28:])
v12 := binary.LittleEndian.Uint32(nonce[0:])
v13 := binary.LittleEndian.Uint32(nonce[4:])
v14 := binary.LittleEndian.Uint32(nonce[8:])
v15 := binary.LittleEndian.Uint32(nonce[12:])
for i := 0; i < 20; i += 2 {
v00 += v04
v12 ^= v00
v12 = (v12 << 16) | (v12 >> 16)
v08 += v12
v04 ^= v08
v04 = (v04 << 12) | (v04 >> 20)
v00 += v04
v12 ^= v00
v12 = (v12 << 8) | (v12 >> 24)
v08 += v12
v04 ^= v08
v04 = (v04 << 7) | (v04 >> 25)
v01 += v05
v13 ^= v01
v13 = (v13 << 16) | (v13 >> 16)
v09 += v13
v05 ^= v09
v05 = (v05 << 12) | (v05 >> 20)
v01 += v05
v13 ^= v01
v13 = (v13 << 8) | (v13 >> 24)
v09 += v13
v05 ^= v09
v05 = (v05 << 7) | (v05 >> 25)
v02 += v06
v14 ^= v02
v14 = (v14 << 16) | (v14 >> 16)
v10 += v14
v06 ^= v10
v06 = (v06 << 12) | (v06 >> 20)
v02 += v06
v14 ^= v02
v14 = (v14 << 8) | (v14 >> 24)
v10 += v14
v06 ^= v10
v06 = (v06 << 7) | (v06 >> 25)
v03 += v07
v15 ^= v03
v15 = (v15 << 16) | (v15 >> 16)
v11 += v15
v07 ^= v11
v07 = (v07 << 12) | (v07 >> 20)
v03 += v07
v15 ^= v03
v15 = (v15 << 8) | (v15 >> 24)
v11 += v15
v07 ^= v11
v07 = (v07 << 7) | (v07 >> 25)
v00 += v05
v15 ^= v00
v15 = (v15 << 16) | (v15 >> 16)
v10 += v15
v05 ^= v10
v05 = (v05 << 12) | (v05 >> 20)
v00 += v05
v15 ^= v00
v15 = (v15 << 8) | (v15 >> 24)
v10 += v15
v05 ^= v10
v05 = (v05 << 7) | (v05 >> 25)
v01 += v06
v12 ^= v01
v12 = (v12 << 16) | (v12 >> 16)
v11 += v12
v06 ^= v11
v06 = (v06 << 12) | (v06 >> 20)
v01 += v06
v12 ^= v01
v12 = (v12 << 8) | (v12 >> 24)
v11 += v12
v06 ^= v11
v06 = (v06 << 7) | (v06 >> 25)
v02 += v07
v13 ^= v02
v13 = (v13 << 16) | (v13 >> 16)
v08 += v13
v07 ^= v08
v07 = (v07 << 12) | (v07 >> 20)
v02 += v07
v13 ^= v02
v13 = (v13 << 8) | (v13 >> 24)
v08 += v13
v07 ^= v08
v07 = (v07 << 7) | (v07 >> 25)
v03 += v04
v14 ^= v03
v14 = (v14 << 16) | (v14 >> 16)
v09 += v14
v04 ^= v09
v04 = (v04 << 12) | (v04 >> 20)
v03 += v04
v14 ^= v03
v14 = (v14 << 8) | (v14 >> 24)
v09 += v14
v04 ^= v09
v04 = (v04 << 7) | (v04 >> 25)
binary.LittleEndian.PutUint32(out[0:], v00)
binary.LittleEndian.PutUint32(out[4:], v01)
binary.LittleEndian.PutUint32(out[8:], v02)
binary.LittleEndian.PutUint32(out[12:], v03)
binary.LittleEndian.PutUint32(out[16:], v12)
binary.LittleEndian.PutUint32(out[20:], v13)
binary.LittleEndian.PutUint32(out[24:], v14)
binary.LittleEndian.PutUint32(out[28:], v15)
@ -0,0 +1,33 @@
// +build !amd64,!386 gccgo appengine nacl
package chacha
import "encoding/binary"
func init() {
useSSE2 = false
useSSSE3 = false
useAVX = false
useAVX2 = false
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
binary.LittleEndian.PutUint32(state[0:], sigma[0])
binary.LittleEndian.PutUint32(state[4:], sigma[1])
binary.LittleEndian.PutUint32(state[8:], sigma[2])
binary.LittleEndian.PutUint32(state[12:], sigma[3])
copy(state[16:], key[:])
copy(state[48:], nonce[:])
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
return xorKeyStreamGeneric(dst, src, block, state, rounds)
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
hChaCha20Generic(out, nonce, key)
@ -0,0 +1,53 @@
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma<>+0x00(SB)/4, $0x61707865
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants
// SSE2/SSE3/AVX constants
DATA ·one<>+0x00(SB)/8, $1
DATA ·one<>+0x08(SB)/8, $0
GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant
// AVX2 constants
DATA ·one_AVX2<>+0x00(SB)/8, $0
DATA ·one_AVX2<>+0x08(SB)/8, $0
DATA ·one_AVX2<>+0x10(SB)/8, $1
DATA ·one_AVX2<>+0x18(SB)/8, $0
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value
DATA ·two_AVX2<>+0x00(SB)/8, $2
DATA ·two_AVX2<>+0x08(SB)/8, $0
DATA ·two_AVX2<>+0x10(SB)/8, $2
DATA ·two_AVX2<>+0x18(SB)/8, $0
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant
@ -0,0 +1,163 @@
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
// ROTL_SSE rotates all 4 32 bit values of the XMM register v
// left by n bits using SSE2 instructions (0 <= n <= 32).
// The XMM register t is used as a temp. register.
#define ROTL_SSE(n, t, v) \
MOVO v, t; \
PSLLL $n, t; \
PSRLL $(32-n), v; \
PXOR t, v
// ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v
// left by n bits using AVX/AVX2 instructions (0 <= n <= 32).
// The AVX/AVX2 register t is used as a temp. register.
#define ROTL_AVX(n, t, v) \
VPSLLD $n, v, t; \
VPSRLD $(32-n), v, v; \
VPXOR v, t, v
// CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the
// 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for
// rotations. The XMM register t is used as a temp. register.
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE(16, t, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(12, t, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE(8, t, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(7, t, v1)
// CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the
// 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit
// rotations. The XMM register t is used as a temp. register.
// r16 holds the PSHUFB constant for a 16 bit left rotate.
// r8 holds the PSHUFB constant for a 8 bit left rotate.
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r16, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(12, t, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r8, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(7, t, v1)
// CHACHA_QROUND_AVX performs a ChaCha quarter-round using the
// 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit
// rotations. The AVX/AVX2 register t is used as a temp. register.
// r16 holds the VPSHUFB constant for a 16 bit left rotate.
// r8 holds the VPSHUFB constant for a 8 bit left rotate.
#define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB r16, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL_AVX(12, t, v1); \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB r8, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL_AVX(7, t, v1)
// CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the
// 3 XMM registers v1, v2 and v3. The inverse shuffle is
// performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1).
#define CHACHA_SHUFFLE_SSE(v1, v2, v3) \
PSHUFL $0x39, v1, v1; \
PSHUFL $0x4E, v2, v2; \
PSHUFL $0x93, v3, v3
// CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the
// 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is
// performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1).
#define CHACHA_SHUFFLE_AVX(v1, v2, v3) \
VPSHUFD $0x39, v1, v1; \
VPSHUFD $0x4E, v2, v2; \
VPSHUFD $0x93, v3, v3
// XOR_SSE extracts 4x16 byte vectors from src at
// off, xors all vectors with the corresponding XMM
// register (v0 - v3) and writes the result to dst
// at off.
// The XMM register t is used as a temp. register.
#define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \
MOVOU 0+off(src), t; \
PXOR v0, t; \
MOVOU t, 0+off(dst); \
MOVOU 16+off(src), t; \
PXOR v1, t; \
MOVOU t, 16+off(dst); \
MOVOU 32+off(src), t; \
PXOR v2, t; \
MOVOU t, 32+off(dst); \
MOVOU 48+off(src), t; \
PXOR v3, t; \
MOVOU t, 48+off(dst)
// XOR_AVX extracts 4x16 byte vectors from src at
// off, xors all vectors with the corresponding AVX
// register (v0 - v3) and writes the result to dst
// at off.
// The XMM register t is used as a temp. register.
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \
VPXOR 0+off(src), v0, t; \
VMOVDQU t, 0+off(dst); \
VPXOR 16+off(src), v1, t; \
VMOVDQU t, 16+off(dst); \
VPXOR 32+off(src), v2, t; \
VMOVDQU t, 32+off(dst); \
VPXOR 48+off(src), v3, t; \
VMOVDQU t, 48+off(dst)
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
VMOVDQU (64+off)(src), t0; \
VPERM2I128 $49, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (64+off)(dst); \
VMOVDQU (96+off)(src), t0; \
VPERM2I128 $49, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (96+off)(dst)
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
VPERM2I128 $49, v1, v0, t0; \
VMOVDQU t0, 0(dst); \
VPERM2I128 $49, v3, v2, t0; \
VMOVDQU t0, 32(dst)
@ -0,0 +1,4 @@
@ -0,0 +1,17 @@
# Set base env.
ARG GOOS=linux
ENV GOOS=${GOOS} GOARCH=${GOARCH} CGO_ENABLED=0 GOFLAGS='-v -ldflags=-s -ldflags=-w'
# Pre compile the stdlib for 386/arm (32bits).
RUN go build -a std
# Add the code to the image.
ADD . .
# Build the lib.
RUN go build
@ -0,0 +1,23 @@
# NOTE: Using 1.13 as a base to build the RISCV compiler, the resulting version is based on go1.6.
FROM golang:1.13
# Clone and complie a riscv compatible version of the go compiler.
RUN git clone https://review.gerrithub.io/riscv/riscv-go /riscv-go
# riscvdev branch HEAD as of 2019-06-29.
RUN cd /riscv-go && git checkout 04885fddd096d09d4450726064d06dd107e374bf
ENV PATH=/riscv-go/misc/riscv:/riscv-go/bin:$PATH
RUN cd /riscv-go/src && GOROOT_BOOTSTRAP=$(go env GOROOT) ./make.bash
ENV GOROOT=/riscv-go
# Set the base env.
ENV GOOS=linux GOARCH=riscv CGO_ENABLED=0 GOFLAGS='-v -ldflags=-s -ldflags=-w'
# Pre compile the stdlib.
RUN go build -a std
# Add the code to the image.
ADD . .
# Build the lib.
RUN go build
@ -0,0 +1,23 @@
@ -0,0 +1,107 @@
# pty
Pty is a Go package for using unix pseudo-terminals.
## Install
go get github.com/creack/pty
## Examples
Note that those examples are for demonstration purpose only, to showcase how to use the library. They are not meant to be used in any kind of production environment.
### Command
package main
import (
func main() {
c := exec.Command("grep", "--color=auto", "bar")
f, err := pty.Start(c)
if err != nil {
go func() {
f.Write([]byte{4}) // EOT
io.Copy(os.Stdout, f)
### Shell
package main
import (
func test() error {
// Create arbitrary command.
c := exec.Command("bash")
// Start the command with a pty.
ptmx, err := pty.Start(c)
if err != nil {
return err
// Make sure to close the pty at the end.
defer func() { _ = ptmx.Close() }() // Best effort.
// Handle pty size.
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGWINCH)
go func() {
for range ch {
if err := pty.InheritSize(os.Stdin, ptmx); err != nil {
log.Printf("error resizing pty: %s", err)
ch <- syscall.SIGWINCH // Initial resize.
defer func() { signal.Stop(ch); close(ch) }() // Cleanup signals when done.
// Set stdin in raw mode.
oldState, err := term.MakeRaw(int(os.Stdin.Fd()))
if err != nil {
defer func() { _ = term.Restore(int(os.Stdin.Fd()), oldState) }() // Best effort.
// Copy stdin to the pty and the pty to stdout.
// NOTE: The goroutine will keep reading until the next keystroke before returning.
go func() { _, _ = io.Copy(ptmx, os.Stdin) }()
_, _ = io.Copy(os.Stdout, ptmx)
return nil
func main() {
if err := test(); err != nil {
@ -0,0 +1,18 @@
//go:build gc
//+build gc
#include "textflag.h"
// System calls for amd64, Solaris are implemented in runtime/syscall_solaris.go
TEXT ·sysvicall6(SB),NOSPLIT,$0-88
JMP syscall·sysvicall6(SB)
TEXT ·rawSysvicall6(SB),NOSPLIT,$0-88
JMP syscall·rawSysvicall6(SB)
@ -0,0 +1,16 @@
// Package pty provides functions for working with Unix terminals.
package pty
import (
// ErrUnsupported is returned if a function is not
// available on the current platform.
var ErrUnsupported = errors.New("unsupported")
// Open a pty and its corresponding tty.
func Open() (pty, tty *os.File, err error) {
return open()
@ -0,0 +1,19 @@
//go:build !windows && !solaris && !aix
// +build !windows,!solaris,!aix
package pty
import "syscall"
const (
func ioctl(fd, cmd, ptr uintptr) error {
_, _, e := syscall.Syscall(syscall.SYS_IOCTL, fd, cmd, ptr)
if e != 0 {
return e
return nil
@ -0,0 +1,40 @@
//go:build darwin || dragonfly || freebsd || netbsd || openbsd
// +build darwin dragonfly freebsd netbsd openbsd
package pty
// from <sys/ioccom.h>
const (
_IOC_VOID uintptr = 0x20000000
_IOC_OUT uintptr = 0x40000000
_IOC_IN uintptr = 0x80000000
_IOC_IN_OUT uintptr = _IOC_OUT | _IOC_IN
func _IOC_PARM_LEN(ioctl uintptr) uintptr {
return (ioctl >> 16) & _IOC_PARAM_MASK
return _IOC(_IOC_VOID, group, ioctl_num, 0)
func _IOR(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
return _IOC(_IOC_OUT, group, ioctl_num, param_len)
func _IOW(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
return _IOC(_IOC_IN, group, ioctl_num, param_len)
func _IOWR(group byte, ioctl_num uintptr, param_len uintptr) uintptr {
return _IOC(_IOC_IN_OUT, group, ioctl_num, param_len)
@ -0,0 +1,48 @@
//go:build solaris
// +build solaris
package pty
import (
//go:cgo_import_dynamic libc_ioctl ioctl "libc.so"
//go:linkname procioctl libc_ioctl
var procioctl uintptr
const (
// see /usr/include/sys/stropts.h
I_PUSH = uintptr((int32('S')<<8 | 002))
I_STR = uintptr((int32('S')<<8 | 010))
I_FIND = uintptr((int32('S')<<8 | 013))
// see /usr/include/sys/ptms.h
ISPTM = (int32('P') << 8) | 1
UNLKPT = (int32('P') << 8) | 2
PTSSTTY = (int32('P') << 8) | 3
ZONEPT = (int32('P') << 8) | 4
OWNERPT = (int32('P') << 8) | 5
// see /usr/include/sys/termios.h
TIOCSWINSZ = (uint32('T') << 8) | 103
TIOCGWINSZ = (uint32('T') << 8) | 104
type strioctl struct {
icCmd int32
icTimeout int32
icLen int32
icDP unsafe.Pointer
// Defined in asm_solaris_amd64.s.
func sysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
func ioctl(fd, cmd, ptr uintptr) error {
if _, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procioctl)), 3, fd, cmd, ptr, 0, 0, 0); errno != 0 {
return errno
return nil
@ -0,0 +1,13 @@
//go:build aix
// +build aix
package pty
const (
func ioctl(fd, cmd, ptr uintptr) error {
return ErrUnsupported
@ -0,0 +1,19 @@
#!/usr/bin/env bash
case "$GOOSARCH" in
_* | *_ | _)
echo 'undefined $GOOS_$GOARCH:' "$GOOSARCH" 1>&2
exit 1
GODEFS="go tool cgo -godefs"
$GODEFS types.go |gofmt > ztypes_$GOARCH.go
case $GOOS in
$GODEFS types_$GOOS.go |gofmt > ztypes_$GOOSARCH.go
@ -0,0 +1,68 @@
//go:build darwin
// +build darwin
package pty
import (
func open() (pty, tty *os.File, err error) {
pFD, err := syscall.Open("/dev/ptmx", syscall.O_RDWR|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, nil, err
p := os.NewFile(uintptr(pFD), "/dev/ptmx")
// In case of error after this point, make sure we close the ptmx fd.
defer func() {
if err != nil {
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
if err := grantpt(p); err != nil {
return nil, nil, err
if err := unlockpt(p); err != nil {
return nil, nil, err
t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0)
if err != nil {
return nil, nil, err
return p, t, nil
func ptsname(f *os.File) (string, error) {
n := make([]byte, _IOC_PARM_LEN(syscall.TIOCPTYGNAME))
err := ioctl(f.Fd(), syscall.TIOCPTYGNAME, uintptr(unsafe.Pointer(&n[0])))
if err != nil {
return "", err
for i, c := range n {
if c == 0 {
return string(n[:i]), nil
return "", errors.New("TIOCPTYGNAME string not NUL-terminated")
func grantpt(f *os.File) error {
return ioctl(f.Fd(), syscall.TIOCPTYGRANT, 0)
func unlockpt(f *os.File) error {
return ioctl(f.Fd(), syscall.TIOCPTYUNLK, 0)
@ -0,0 +1,83 @@
//go:build dragonfly
// +build dragonfly
package pty
import (
// same code as pty_darwin.go
func open() (pty, tty *os.File, err error) {
p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
if err != nil {
return nil, nil, err
// In case of error after this point, make sure we close the ptmx fd.
defer func() {
if err != nil {
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
if err := grantpt(p); err != nil {
return nil, nil, err
if err := unlockpt(p); err != nil {
return nil, nil, err
t, err := os.OpenFile(sname, os.O_RDWR, 0)
if err != nil {
return nil, nil, err
return p, t, nil
func grantpt(f *os.File) error {
_, err := isptmaster(f.Fd())
return err
func unlockpt(f *os.File) error {
_, err := isptmaster(f.Fd())
return err
func isptmaster(fd uintptr) (bool, error) {
err := ioctl(fd, syscall.TIOCISPTMASTER, 0)
return err == nil, err
var (
emptyFiodgnameArg fiodgnameArg
ioctl_FIODNAME = _IOW('f', 120, unsafe.Sizeof(emptyFiodgnameArg))
func ptsname(f *os.File) (string, error) {
name := make([]byte, _C_SPECNAMELEN)
fa := fiodgnameArg{Name: (*byte)(unsafe.Pointer(&name[0])), Len: _C_SPECNAMELEN, Pad_cgo_0: [4]byte{0, 0, 0, 0}}
err := ioctl(f.Fd(), ioctl_FIODNAME, uintptr(unsafe.Pointer(&fa)))
if err != nil {
return "", err
for i, c := range name {
if c == 0 {
s := "/dev/" + string(name[:i])
return strings.Replace(s, "ptm", "pts", -1), nil
return "", errors.New("TIOCPTYGNAME string not NUL-terminated")
@ -0,0 +1,81 @@
//go:build freebsd
// +build freebsd
package pty
import (
func posixOpenpt(oflag int) (fd int, err error) {
r0, _, e1 := syscall.Syscall(syscall.SYS_POSIX_OPENPT, uintptr(oflag), 0, 0)
fd = int(r0)
if e1 != 0 {
err = e1
return fd, err
func open() (pty, tty *os.File, err error) {
fd, err := posixOpenpt(syscall.O_RDWR | syscall.O_CLOEXEC)
if err != nil {
return nil, nil, err
p := os.NewFile(uintptr(fd), "/dev/pts")
// In case of error after this point, make sure we close the pts fd.
defer func() {
if err != nil {
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
return p, t, nil
func isptmaster(fd uintptr) (bool, error) {
err := ioctl(fd, syscall.TIOCPTMASTER, 0)
return err == nil, err
var (
emptyFiodgnameArg fiodgnameArg
ioctlFIODGNAME = _IOW('f', 120, unsafe.Sizeof(emptyFiodgnameArg))
func ptsname(f *os.File) (string, error) {
master, err := isptmaster(f.Fd())
if err != nil {
return "", err
if !master {
return "", syscall.EINVAL
const n = _C_SPECNAMELEN + 1
var (
buf = make([]byte, n)
arg = fiodgnameArg{Len: n, Buf: (*byte)(unsafe.Pointer(&buf[0]))}
if err := ioctl(f.Fd(), ioctlFIODGNAME, uintptr(unsafe.Pointer(&arg))); err != nil {
return "", err
for i, c := range buf {
if c == 0 {
return string(buf[:i]), nil
return "", errors.New("FIODGNAME string not NUL-terminated")
@ -0,0 +1,54 @@
//go:build linux
// +build linux
package pty
import (
func open() (pty, tty *os.File, err error) {
p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
if err != nil {
return nil, nil, err
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
if err := unlockpt(p); err != nil {
return nil, nil, err
t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0) //nolint:gosec // Expected Open from a variable.
if err != nil {
return nil, nil, err
return p, t, nil
func ptsname(f *os.File) (string, error) {
var n _C_uint
err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))) //nolint:gosec // Expected unsafe pointer for Syscall call.
if err != nil {
return "", err
return "/dev/pts/" + strconv.Itoa(int(n)), nil
func unlockpt(f *os.File) error {
var u _C_int
// use TIOCSPTLCK with a pointer to zero to clear the lock
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) //nolint:gosec // Expected unsafe pointer for Syscall call.
@ -0,0 +1,69 @@
//go:build netbsd
// +build netbsd
package pty
import (
func open() (pty, tty *os.File, err error) {
p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
if err != nil {
return nil, nil, err
// In case of error after this point, make sure we close the ptmx fd.
defer func() {
if err != nil {
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
if err := grantpt(p); err != nil {
return nil, nil, err
// In NetBSD unlockpt() does nothing, so it isn't called here.
t, err := os.OpenFile(sname, os.O_RDWR|syscall.O_NOCTTY, 0)
if err != nil {
return nil, nil, err
return p, t, nil
func ptsname(f *os.File) (string, error) {
* from ptsname(3): The ptsname() function is equivalent to:
* struct ptmget pm;
* ioctl(fd, TIOCPTSNAME, &pm) == -1 ? NULL : pm.sn;
var ptm ptmget
if err := ioctl(f.Fd(), uintptr(ioctl_TIOCPTSNAME), uintptr(unsafe.Pointer(&ptm))); err != nil {
return "", err
name := make([]byte, len(ptm.Sn))
for i, c := range ptm.Sn {
name[i] = byte(c)
if c == 0 {
return string(name[:i]), nil
return "", errors.New("TIOCPTSNAME string not NUL-terminated")
func grantpt(f *os.File) error {
* from grantpt(3): Calling grantpt() is equivalent to:
* ioctl(fd, TIOCGRANTPT, 0);
return ioctl(f.Fd(), uintptr(ioctl_TIOCGRANTPT), 0)
@ -0,0 +1,36 @@
//go:build openbsd
// +build openbsd
package pty
import (
func open() (pty, tty *os.File, err error) {
* from ptm(4):
* The PTMGET command allocates a free pseudo terminal, changes its
* ownership to the caller, revokes the access privileges for all previous
* users, opens the file descriptors for the pty and tty devices and
* returns them to the caller in struct ptmget.
p, err := os.OpenFile("/dev/ptm", os.O_RDWR|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, nil, err
defer p.Close()
var ptm ptmget
if err := ioctl(p.Fd(), uintptr(ioctl_PTMGET), uintptr(unsafe.Pointer(&ptm))); err != nil {
return nil, nil, err
pty = os.NewFile(uintptr(ptm.Cfd), "/dev/ptm")
tty = os.NewFile(uintptr(ptm.Sfd), "/dev/ptm")
return pty, tty, nil
@ -0,0 +1,152 @@
//go:build solaris
// +build solaris
package pty
/* based on:
import (
func open() (pty, tty *os.File, err error) {
ptmxfd, err := syscall.Open("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY, 0)
if err != nil {
return nil, nil, err
p := os.NewFile(uintptr(ptmxfd), "/dev/ptmx")
// In case of error after this point, make sure we close the ptmx fd.
defer func() {
if err != nil {
_ = p.Close() // Best effort.
sname, err := ptsname(p)
if err != nil {
return nil, nil, err
if err := grantpt(p); err != nil {
return nil, nil, err
if err := unlockpt(p); err != nil {
return nil, nil, err
t := os.NewFile(uintptr(ptsfd), sname)
// In case of error after this point, make sure we close the pts fd.
defer func() {
if err != nil {
_ = t.Close() // Best effort.
// pushing terminal driver STREAMS modules as per pts(7)
for _, mod := range []string{"ptem", "ldterm", "ttcompat"} {
if err := streamsPush(t, mod); err != nil {
return nil, nil, err
return p, t, nil
func ptsname(f *os.File) (string, error) {
dev, err := ptsdev(f.Fd())
if err != nil {
return "", err
fn := "/dev/pts/" + strconv.FormatInt(int64(dev), 10)
if err := syscall.Access(fn, 0); err != nil {
return "", err
return fn, nil
func unlockpt(f *os.File) error {
istr := strioctl{
icCmd: UNLKPT,
icTimeout: 0,
icLen: 0,
icDP: nil,
return ioctl(f.Fd(), I_STR, uintptr(unsafe.Pointer(&istr)))
func minor(x uint64) uint64 { return x & 0377 }
func ptsdev(fd uintptr) (uint64, error) {
istr := strioctl{
icCmd: ISPTM,
icTimeout: 0,
icLen: 0,
icDP: nil,
if err := ioctl(fd, I_STR, uintptr(unsafe.Pointer(&istr))); err != nil {
return 0, err
var status syscall.Stat_t
if err := syscall.Fstat(int(fd), &status); err != nil {
return 0, err
return uint64(minor(status.Rdev)), nil
type ptOwn struct {
rUID int32
rGID int32
func grantpt(f *os.File) error {
if _, err := ptsdev(f.Fd()); err != nil {
return err
pto := ptOwn{
rUID: int32(os.Getuid()),
// XXX should first attempt to get gid of DEFAULT_TTY_GROUP="tty"
rGID: int32(os.Getgid()),
istr := strioctl{
icTimeout: 0,
icLen: int32(unsafe.Sizeof(strioctl{})),
icDP: unsafe.Pointer(&pto),
if err := ioctl(f.Fd(), I_STR, uintptr(unsafe.Pointer(&istr))); err != nil {
return errors.New("access denied")
return nil
// streamsPush pushes STREAMS modules if not already done so.
func streamsPush(f *os.File, mod string) error {
buf := []byte(mod)
// XXX I_FIND is not returning an error when the module
// is already pushed even though truss reports a return
// value of 1. A bug in the Go Solaris syscall interface?
// XXX without this we are at risk of the issue
// https://www.illumos.org/issues/9042
// but since we are not using libc or XPG4.2, we should not be
// double-pushing modules
if err := ioctl(f.Fd(), I_FIND, uintptr(unsafe.Pointer(&buf[0]))); err != nil {
return nil
return ioctl(f.Fd(), I_PUSH, uintptr(unsafe.Pointer(&buf[0])))
@ -0,0 +1,12 @@
//go:build !linux && !darwin && !freebsd && !dragonfly && !netbsd && !openbsd && !solaris
// +build !linux,!darwin,!freebsd,!dragonfly,!netbsd,!openbsd,!solaris
package pty
import (
func open() (pty, tty *os.File, err error) {
return nil, nil, ErrUnsupported
@ -0,0 +1,57 @@
// and c.Stderr, calls c.Start, and returns the File of the tty's
// corresponding pty.
// Starts the process in a new session and sets the controlling terminal.
func Start(cmd *exec.Cmd) (*os.File, error) {
return StartWithSize(cmd, nil)
// StartWithAttrs assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
// and c.Stderr, calls c.Start, and returns the File of the tty's
// corresponding pty.
// This will resize the pty to the specified size before starting the command if a size is provided.
// The `attrs` parameter overrides the one set in c.SysProcAttr.
// This should generally not be needed. Used in some edge cases where it is needed to create a pty
// without a controlling terminal.
func StartWithAttrs(c *exec.Cmd, sz *Winsize, attrs *syscall.SysProcAttr) (*os.File, error) {
pty, tty, err := Open()
if err != nil {
return nil, err
defer func() { _ = tty.Close() }() // Best effort.
if sz != nil {
if err := Setsize(pty, sz); err != nil {
_ = pty.Close() // Best effort.
return nil, err
if c.Stdout == nil {
c.Stdout = tty
if c.Stderr == nil {
c.Stderr = tty
if c.Stdin == nil {
c.Stdin = tty
c.SysProcAttr = attrs
if err := c.Start(); err != nil {
_ = pty.Close() // Best effort.
return nil, err
return pty, err
@ -0,0 +1,25 @@
import (
// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
// and c.Stderr, calls c.Start, and returns the File of the tty's
// corresponding pty.
// This will resize the pty to the specified size before starting the command.
// Starts the process in a new session and sets the controlling terminal.
func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
cmd.SysProcAttr.Setsid = true
cmd.SysProcAttr.Setctty = true
return StartWithAttrs(cmd, ws, cmd.SysProcAttr)
@ -0,0 +1,19 @@
import (
// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
// and c.Stderr, calls c.Start, and returns the File of the tty's
// corresponding pty.
// This will resize the pty to the specified size before starting the command.
// Starts the process in a new session and sets the controlling terminal.
func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
return nil, ErrUnsupported
@ -0,0 +1,64 @@
#!/usr/bin/env sh
# Test script checking that all expected os/arch compile properly.
# Does not actually test the logic, just the compilation so we make sure we don't break code depending on the lib.
echo2() {
echo $@ >&2
trap end 0
end() {
[ "$?" = 0 ] && echo2 "Pass." || (echo2 "Fail."; exit 1)
cross() {
echo2 "Build for $os."
for arch in $@; do
echo2 " - $os/$arch"
GOOS=$os GOARCH=$arch go build
set -e
cross linux amd64 386 arm arm64 ppc64 ppc64le s390x mips mipsle mips64 mips64le
cross darwin amd64 arm64
cross freebsd amd64 386 arm arm64
cross netbsd amd64 386 arm arm64
cross openbsd amd64 386 arm arm64
cross dragonfly amd64
cross solaris amd64
# Not expected to work but should still compile.
cross windows amd64 386 arm
# TODO: Fix compilation error on openbsd/arm.
# TODO: Merge the solaris PR.
# Some os/arch require a different compiler. Run in docker.
if ! hash docker; then
# If docker is not present, stop here.
echo2 "Build for linux."
echo2 " - linux/riscv"
docker build -t creack-pty-test -f Dockerfile.riscv .
# Golang dropped support for darwin 32bits since go1.15. Make sure the lib still compile with go1.14 on those archs.
echo2 "Build for darwin (32bits)."
echo2 " - darwin/386"
docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.14 --build-arg=GOOS=darwin --build-arg=GOARCH=386 .
echo2 " - darwin/arm"
docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.14 --build-arg=GOOS=darwin --build-arg=GOARCH=arm .
# Run a single test for an old go version. Would be best with go1.0, but not available on Dockerhub.
# Using 1.6 as it is the base version for the RISCV compiler.
# Would also be better to run all the tests, not just one, need to refactor this file to allow for specifc archs per version.
echo2 "Build for linux - go1.6."
echo2 " - linux/amd64"
docker build -t creack-pty-test -f Dockerfile.golang --build-arg=GOVERSION=1.6 --build-arg=GOOS=linux --build-arg=GOARCH=amd64 .
@ -0,0 +1,27 @@
package pty
import "os"
// InheritSize applies the terminal size of pty to tty. This should be run
// in a signal handler for syscall.SIGWINCH to automatically resize the tty when
// the pty receives a window size change notification.
func InheritSize(pty, tty *os.File) error {
size, err := GetsizeFull(pty)
if err != nil {
return err
if err := Setsize(tty, size); err != nil {
return err
return nil
// Getsize returns the number of rows (lines) and cols (positions
// in each line) in terminal t.
func Getsize(t *os.File) (rows, cols int, err error) {
ws, err := GetsizeFull(t)
if err != nil {
return 0, 0, err
return int(ws.Rows), int(ws.Cols), nil
@ -0,0 +1,35 @@
import (
// Winsize describes the terminal size.
type Winsize struct {
Rows uint16 // ws_row: Number of rows (in cells)
Cols uint16 // ws_col: Number of columns (in cells)
X uint16 // ws_xpixel: Width in pixels
Y uint16 // ws_ypixel: Height in pixels
// Setsize resizes t to s.
func Setsize(t *os.File, ws *Winsize) error {
//nolint:gosec // Expected unsafe pointer for Syscall call.
return ioctl(t.Fd(), syscall.TIOCSWINSZ, uintptr(unsafe.Pointer(ws)))
// GetsizeFull returns the full terminal size description.
func GetsizeFull(t *os.File) (size *Winsize, err error) {
var ws Winsize
//nolint:gosec // Expected unsafe pointer for Syscall call.
if err := ioctl(t.Fd(), syscall.TIOCGWINSZ, uintptr(unsafe.Pointer(&ws))); err != nil {
return nil, err
return &ws, nil
@ -0,0 +1,23 @@
import (
// Winsize is a dummy struct to enable compilation on unsupported platforms.
type Winsize struct {
Rows, Cols, X, Y uint16
// Setsize resizes t to s.
func Setsize(*os.File, *Winsize) error {
return ErrUnsupported
// GetsizeFull returns the full terminal size description.
func GetsizeFull(*os.File) (*Winsize, error) {
return nil, ErrUnsupported
@ -0,0 +1,12 @@
// cgo -godefs types.go
package pty
type (
_C_int int32
_C_uint uint32
@ -0,0 +1,12 @@
// cgo -godefs types.go
package pty
type (
_C_int int32
_C_uint uint32
@ -0,0 +1,12 @@
// cgo -godefs types.go
package pty
type (
_C_int int32
_C_uint uint32
@ -0,0 +1,12 @@
// cgo -godefs types.go
package pty
type (
_C_int int32
_C_uint uint32
@ -0,0 +1,17 @@
// cgo -godefs types_dragonfly.go
package pty
const (
type fiodgnameArg struct {
Name *byte
Len uint32
Pad_cgo_0 [4]byte
@ -0,0 +1,16 @@
// cgo -godefs types_freebsd.go
package pty
const (
type fiodgnameArg struct {
Len int32
Buf *byte
@ -0,0 +1,17 @@
// cgo -godefs types_freebsd.go
package pty
const (
type fiodgnameArg struct {
Len int32
Pad_cgo_0 [4]byte
Buf *byte
@ -0,0 +1,16 @@
// cgo -godefs types_freebsd.go
package pty
const (
type fiodgnameArg struct {
Len int32
Buf *byte
@ -0,0 +1,16 @@
// cgo -godefs types_freebsd.go
package pty
const (
type fiodgnameArg struct {
Len int32
Buf *byte
@ -0,0 +1,14 @@
package pty
const (
type fiodgnameArg struct {
Len int32
Pad_cgo_0 [4]byte
Buf *byte
@ -0,0 +1,12 @@
// cgo -godefs types.go
package pty
type (
_C_int int32
_C_uint uint32
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue