diff --git a/Makefile b/Makefile index 42238eb..eeb9f77 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := 0.8.13 +VERSION := 0.8.19 .PHONY: lint vis clean common client server passwd subpkgs install uninstall reinstall ## Tag version of binaries with build info wrt. diff --git a/auth.go b/auth.go index 6e0adcb..017dd98 100644 --- a/auth.go +++ b/auth.go @@ -26,12 +26,25 @@ import ( passlib "gopkg.in/hlandau/passlib.v1" ) +type AuthCtx struct { + reader func(string) ([]byte, error) // eg. ioutil.ReadFile() + userlookup func(string) (*user.User, error) // eg. os/user.Lookup() +} + +func NewAuthCtx( /*reader func(string) ([]byte, error), userlookup func(string) (*user.User, error)*/ ) (ret *AuthCtx) { + ret = &AuthCtx{ioutil.ReadFile, user.Lookup} + return +} + // --------- System passwd/shadow auth routine(s) -------------- // Verify a password against system standard shadow file // Note auxilliary fields for expiry policy are *not* inspected. -func VerifyPass(user, password string) (bool, error) { +func VerifyPass(ctx *AuthCtx, user, password string) (bool, error) { + if ctx.reader == nil { + ctx.reader = ioutil.ReadFile // dependency injection hides that this is required + } passlib.UseDefaults(passlib.Defaults20180601) - pwFileData, e := ioutil.ReadFile("/etc/shadow") + pwFileData, e := ctx.reader("/etc/shadow") if e != nil { return false, e } @@ -70,8 +83,14 @@ func VerifyPass(user, password string) (bool, error) { // This checks /etc/xs.passwd for auth info, and system /etc/passwd // to cross-check the user actually exists. // nolint: gocyclo -func AuthUserByPasswd(username string, auth string, fname string) (valid bool, allowedCmds string) { - b, e := ioutil.ReadFile(fname) // nolint: gosec +func AuthUserByPasswd(ctx *AuthCtx, username string, auth string, fname string) (valid bool, allowedCmds string) { + if ctx.reader == nil { + ctx.reader = ioutil.ReadFile // dependency injection hides that this is required + } + if ctx.userlookup == nil { + ctx.userlookup = user.Lookup // again for dependency injection as dep is now hidden + } + b, e := ctx.reader(fname) // nolint: gosec if e != nil { valid = false log.Printf("ERROR: Cannot read %s!\n", fname) @@ -115,7 +134,8 @@ func AuthUserByPasswd(username string, auth string, fname string) (valid bool, a r = nil runtime.GC() - if !userExistsOnSystem(username) { + _, userErr := ctx.userlookup(username) + if userErr != nil { valid = false } return @@ -123,24 +143,26 @@ func AuthUserByPasswd(username string, auth string, fname string) (valid bool, a // ------------- End xs-local passwd auth routine(s) ----------- -func userExistsOnSystem(who string) bool { - _, userErr := user.Lookup(who) - return userErr == nil -} - // AuthUserByToken checks user login information against an auth token. // Auth tokens are stored in each user's $HOME/.xs_id and are requested // via the -g option. // The function also check system /etc/passwd to cross-check the user // actually exists. -func AuthUserByToken(username string, connhostname string, auth string) (valid bool) { +func AuthUserByToken(ctx *AuthCtx, username string, connhostname string, auth string) (valid bool) { + if ctx.reader == nil { + ctx.reader = ioutil.ReadFile // dependency injection hides that this is required + } + if ctx.userlookup == nil { + ctx.userlookup = user.Lookup // again for dependency injection as dep is now hidden + } + auth = strings.TrimSpace(auth) - u, ue := user.Lookup(username) + u, ue := ctx.userlookup(username) if ue != nil { return false } - b, e := ioutil.ReadFile(fmt.Sprintf("%s/.xs_id", u.HomeDir)) + b, e := ctx.reader(fmt.Sprintf("%s/.xs_id", u.HomeDir)) if e != nil { log.Printf("INFO: Cannot read %s/.xs_id\n", u.HomeDir) return false @@ -167,7 +189,8 @@ func AuthUserByToken(username string, connhostname string, auth string) (valid b break } } - if !userExistsOnSystem(username) { + _, userErr := ctx.userlookup(username) + if userErr != nil { valid = false } return diff --git a/auth_test.go b/auth_test.go new file mode 100644 index 0000000..cfcec60 --- /dev/null +++ b/auth_test.go @@ -0,0 +1,212 @@ +package xs + +import ( + "errors" + "fmt" + "os/user" + "strings" + "testing" +) + +type userVerifs struct { + user string + passwd string + good bool +} + +var ( + dummyShadowA = `johndoe:$6$EeQlTtn/KXdSh6CW$UHbFuEw3UA0Jg9/GoPHxgWk6Ws31x3IjqsP22a9pVMOte0yQwX1.K34oI4FACu8GRg9DArJ5RyWUE9m98qwzZ1:18310:0:99999:7::: +joebloggs:$6$F.0IXOrb0w0VJHG1$3O4PYyng7F3hlh42mbroEdQZvslybY5etPPiLMQJ1xosjABY.Q4xqAfyIfe03Du61ZjGQIt3nL0j12P9k1fsK/:18310:0:99999:7::: +disableduser:!:18310::::::` + + dummyAuthTokenFile = "hostA:abcdefg\nhostB:wxyz\n" + + dummyXsPasswdFile = `#username:salt:authCookie +bobdobbs:$2a$12$9vqGkFqikspe/2dTARqu1O:$2a$12$9vqGkFqikspe/2dTARqu1OuDKCQ/RYWsnaFjmi.HtmECRkxcZ.kBK +notbob:$2a$12$cZpiYaq5U998cOkXzRKdyu:$2a$12$cZpiYaq5U998cOkXzRKdyuJ2FoEQyVLa3QkYdPQk74VXMoAzhvuP6 +` + + testGoodUsers = []userVerifs{ + {"johndoe", "testpass", true}, + {"joebloggs", "testpass2", true}, + {"johndoe", "badpass", false}, + } + + testXsPasswdUsers = []userVerifs{ + {"bobdobbs", "praisebob", true}, + {"notbob", "imposter", false}, + } + + userlookup_arg_u string + readfile_arg_f string +) + +func newMockAuthCtx(reader func(string) ([]byte, error), userlookup func(string) (*user.User, error)) (ret *AuthCtx) { + ret = &AuthCtx{reader, userlookup} + return +} + +func _mock_user_Lookup(username string) (*user.User, error) { + username = userlookup_arg_u + if username == "baduser" { + return &user.User{}, errors.New("bad user") + } + urec := &user.User{Uid: "1000", Gid: "1000", Username: username, Name: "Full Name", HomeDir: "/home/user"} + fmt.Printf(" [mock user rec:%v]\n", urec) + return urec, nil +} + +func _mock_ioutil_ReadFile(f string) ([]byte, error) { + f = readfile_arg_f + if f == "/etc/shadow" { + fmt.Println(" [mocking ReadFile(\"/etc/shadow\")]") + return []byte(dummyShadowA), nil + } + if f == "/etc/xs.passwd" { + fmt.Println(" [mocking ReadFile(\"/etc/xs.passwd\")]") + return []byte(dummyXsPasswdFile), nil + } + if strings.Contains(f, "/.xs_id") { + fmt.Println(" [mocking ReadFile(\".xs_id\")]") + return []byte(dummyAuthTokenFile), nil + } + return []byte{}, errors.New("no readfile_arg_f supplied") +} + +func _mock_ioutil_ReadFileEmpty(f string) ([]byte, error) { + return []byte{}, nil +} + +func _mock_ioutil_ReadFileHasError(f string) ([]byte, error) { + return []byte{}, errors.New("IO Error") +} + +func TestVerifyPass(t *testing.T) { + readfile_arg_f = "/etc/shadow" + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, nil) + for idx, rec := range testGoodUsers { + stat, e := VerifyPass(ctx, rec.user, rec.passwd) + if rec.good && (!stat || e != nil) { + t.Fatalf("failed %d\n", idx) + } + } +} + +func TestVerifyPassFailsOnEmptyFile(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFileEmpty, nil) + stat, e := VerifyPass(ctx, "johndoe", "somepass") + if stat || (e == nil) { + t.Fatal("failed to fail w/empty file") + } +} + +func TestVerifyPassFailsOnFileError(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFileEmpty, nil) + stat, e := VerifyPass(ctx, "johndoe", "somepass") + if stat || (e == nil) { + t.Fatal("failed to fail on ioutil.ReadFile error") + } +} + +func TestVerifyPassFailsOnDisabledEntry(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFileEmpty, nil) + stat, e := VerifyPass(ctx, "disableduser", "!") + if stat || (e == nil) { + t.Fatal("failed to fail on disabled user entry") + } +} + +//// + +func TestAuthUserByTokenFailsOnMissingEntryForHost(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + stat := AuthUserByToken(ctx, "johndoe", "hostZ", "abcdefg") + if stat { + t.Fatal("failed to fail on missing/mismatched host entry") + } +} + +func TestAuthUserByTokenFailsOnMissingEntryForUser(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + stat := AuthUserByToken(ctx, "unkuser", "hostA", "abcdefg") + if stat { + t.Fatal("failed to fail on wrong user") + } +} + +func TestAuthUserByTokenFailsOnUserLookupFailure(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "baduser" + stat := AuthUserByToken(ctx, "johndoe", "hostA", "abcdefg") + if stat { + t.Fatal("failed to fail with bad return from user.Lookup()") + } +} + +func TestAuthUserByTokenFailsOnMismatchedTokenForUser(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + stat := AuthUserByToken(ctx, "johndoe", "hostA", "badtoken") + if stat { + t.Fatal("failed to fail with valid user, bad token") + } +} + +func TestAuthUserByTokenSucceedsWithMatchedUserAndToken(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "johndoe" + readfile_arg_f = "/.xs_id" + stat := AuthUserByToken(ctx, userlookup_arg_u, "hostA", "hostA:abcdefg") + if !stat { + t.Fatal("failed with valid user and token") + } +} + +func TestAuthUserByPasswdFailsOnEmptyFile(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFileEmpty, _mock_user_Lookup) + userlookup_arg_u = "bobdobbs" + readfile_arg_f = "/etc/xs.passwd" + stat, _ := AuthUserByPasswd(ctx, userlookup_arg_u, "praisebob", readfile_arg_f) + if stat { + t.Fatal("failed to fail with missing xs.passwd file") + } +} + +func TestAuthUserByPasswdFailsOnBadAuth(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "bobdobbs" + readfile_arg_f = "/etc/xs.passwd" + stat, _ := AuthUserByPasswd(ctx, userlookup_arg_u, "wrongpass", readfile_arg_f) + if stat { + t.Fatal("failed to fail with valid user, incorrect passwd in xs.passwd file") + } +} + +func TestAuthUserByPasswdFailsOnBadUser(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "bobdobbs" + readfile_arg_f = "/etc/xs.passwd" + stat, _ := AuthUserByPasswd(ctx, userlookup_arg_u, "theotherbob", readfile_arg_f) + if stat { + t.Fatal("failed to fail on invalid user vs. xs.passwd file") + } +} + +func TestAuthUserByPasswdPassesOnGoodAuth(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "bobdobbs" + readfile_arg_f = "/etc/xs.passwd" + stat, _ := AuthUserByPasswd(ctx, userlookup_arg_u, "praisebob", readfile_arg_f) + if !stat { + t.Fatal("failed on valid user w/correct passwd in xs.passwd file") + } +} + +func TestAuthUserByPasswdPassesOnOtherGoodAuth(t *testing.T) { + ctx := newMockAuthCtx(_mock_ioutil_ReadFile, _mock_user_Lookup) + userlookup_arg_u = "notbob" + readfile_arg_f = "/etc/xs.passwd" + stat, _ := AuthUserByPasswd(ctx, userlookup_arg_u, "imposter", readfile_arg_f) + if !stat { + t.Fatal("failed on valid user 2nd entry w/correct passwd in xs.passwd file") + } +} diff --git a/bacillus/ci_pushbuild.sh b/bacillus/ci_pushbuild.sh index 46cc42a..c49a483 100755 --- a/bacillus/ci_pushbuild.sh +++ b/bacillus/ci_pushbuild.sh @@ -25,6 +25,9 @@ ls stage "Build" make all +stage "UnitTests" +go test -v . + stage "Test(Authtoken)" echo "Clearing test user $USER ~/.xs_id file ..." rm -f ~/.xs_id diff --git a/go.mod b/go.mod index 9860336..1e35ba1 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,13 @@ module blitter.com/go/xs go 1.12 require ( - blitter.com/go/cryptmt v1.0.0 + blitter.com/go/cryptmt v1.0.2 blitter.com/go/goutmp v1.0.2 blitter.com/go/herradurakex v1.0.0 blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 blitter.com/go/mtwist v1.0.1 // indirect blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae - blitter.com/go/wanderer v0.8.1 + github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f github.com/klauspost/cpuid v1.2.2 // indirect github.com/klauspost/reedsolomon v1.9.3 // indirect diff --git a/go.sum b/go.sum index 23bb532..7760711 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,10 @@ blitter.com/go/chacha20 v0.0.0-20200130200441-214e4085f54c h1:LcnFFg6MCIJHf26P7e blitter.com/go/chacha20 v0.0.0-20200130200441-214e4085f54c/go.mod h1:EMJtRcf22WCtHGiXCw+NB/Sb/PYcXtUgUql6LDEwyXo= blitter.com/go/cryptmt v1.0.0 h1:n+cNP/ReZrNe/w5FbD8DSfv0Wpj48nxhmMoLEk4hPXs= blitter.com/go/cryptmt v1.0.0/go.mod h1:tdME2J3O4agaDAYIYNQzzuB28yVGnPSMmV3a/ucSU84= +blitter.com/go/cryptmt v1.0.1 h1:NAi4FrZqo52bhPJopYw1jbausj1NnHEWELaINC60Nk0= +blitter.com/go/cryptmt v1.0.1/go.mod h1:tdME2J3O4agaDAYIYNQzzuB28yVGnPSMmV3a/ucSU84= +blitter.com/go/cryptmt v1.0.2 h1:ZcLhQk7onUssXyQwG3GdXDXctCVnNL+b7aFuvwOdKXc= +blitter.com/go/cryptmt v1.0.2/go.mod h1:tdME2J3O4agaDAYIYNQzzuB28yVGnPSMmV3a/ucSU84= blitter.com/go/goutmp v1.0.1 h1:jBqtp6pDwSbF4QEC3DjNfyaS8Nv5dFCOyaTfSbbb7TU= blitter.com/go/goutmp v1.0.1/go.mod h1:gtlbjC8xGzMk/Cf0BpnVltSa3awOqJ+B5WAxVptTMxk= blitter.com/go/goutmp v1.0.2 h1:oCc/dt9TlTOP2kvmX1Y7J/wSQUhywjcyF101jXuLxZ8= @@ -16,12 +20,16 @@ blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae h1:YBBaCcdYRrI1btsmcMT blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae/go.mod h1:ywoxfDBqInPsqtnxYsmS4SYMJ5D/kNcrFgpvI+Xcun0= blitter.com/go/wanderer v0.8.1 h1:oQw8yASM7iI+S8GIgf3cUFdkJ8Sy/UQxRDJqhTswgwM= blitter.com/go/wanderer v0.8.1/go.mod h1:FX1pAnZ5woEavy5CUIZco0/Gc2Msb3U0zsmi+6Hs4Rw= +blitter.com/go/wanderer v0.8.2 h1:fzwRn60RDDxy4GEYxSyfA4gXkkZb33WQRk/Fv5ugPAI= +blitter.com/go/wanderer v0.8.2/go.mod h1:FX1pAnZ5woEavy5CUIZco0/Gc2Msb3U0zsmi+6Hs4Rw= git.schwanenlied.me/yawning/chacha20.git v0.0.0-20170904085104-e3b1f968fc63 h1:bwZNsbw3qFbg6ox55HrA37nPmh+/wtJxZ7uWeiAdUUc= git.schwanenlied.me/yawning/chacha20.git v0.0.0-20170904085104-e3b1f968fc63/go.mod h1:NYi4Ifd1g/YbhIDgDfw6t7QdsW4tofQWMX/+FiDtJWs= git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c h1:SGOx1s56QSOmuCegRcG3yvOG7W8PvRS9ZVnFQl5K2aQ= git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c/go.mod h1:QrbgzU5EL/1jaMD5pD4Tiikj3R5elPMa+RMwFUTGwQU= git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2 h1:89TYv/+wotJ+QWrH5B/yN0pEQutr2V/5za0VoYiVGCM= git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2/go.mod h1:weMqACFGzJs4Ni+K9shsRd02N4LkDrtGlkRxISK+II0= +github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da h1:KjTM2ks9d14ZYCvmHS9iAKVt9AyzRSqNU1qabPih5BY= +github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da/go.mod h1:eHEWzANqSiWQsof+nXEI9bUVUyV6F53Fp89EuCh2EAA= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y= diff --git a/logger/logger_linux.go b/logger/logger_linux.go index 7c635fa..5e6f304 100644 --- a/logger/logger_linux.go +++ b/logger/logger_linux.go @@ -11,6 +11,7 @@ import ( // Priority is the logger priority type Priority = sl.Priority + // Writer is a syslog Writer type Writer = sl.Writer @@ -75,50 +76,81 @@ func New(flags Priority, tag string) (w *Writer, e error) { // Alert returns a log Alert error func Alert(s string) error { + if l != nil { return l.Alert(s) + } + return nil + } // LogClose closes the log Writer. func LogClose() error { + if l != nil { return l.Close() + } + return nil } // LogCrit returns a log Alert error func LogCrit(s string) error { + if l != nil { return l.Crit(s) + } + return nil } // LogDebug returns a log Debug error func LogDebug(s string) error { + if l != nil { return l.Debug(s) + } + return nil } // LogEmerg returns a log Emerg error func LogEmerg(s string) error { + if l != nil { return l.Emerg(s) + } + return nil } // LogErr returns a log Err error func LogErr(s string) error { + if l != nil { return l.Err(s) + } + return nil } // LogInfo returns a log Info error func LogInfo(s string) error { + if l != nil { return l.Info(s) + } + return nil } // LogNotice returns a log Notice error func LogNotice(s string) error { + if l != nil { return l.Notice(s) + } + return nil } // LogWarning returns a log Warning error func LogWarning(s string) error { + if l != nil { return l.Warning(s) + } + return nil } // LogWrite writes to the logger at default level func LogWrite(b []byte) (int, error) { + if l != nil { return l.Write(b) + } + return len(b),nil } diff --git a/session_test.go b/session_test.go new file mode 100644 index 0000000..0670ea8 --- /dev/null +++ b/session_test.go @@ -0,0 +1,30 @@ +package xs + +import ( + "testing" +) + +func _newMockSession() (s *Session) { + s = &Session{op: []byte("A"), + who: []byte("johndoe"), + connhost: []byte("host"), + termtype: []byte("vt100"), + cmd: []byte("/bin/false"), + authCookie: []byte("authcookie"), + status: 0} + return s +} + +func TestSessionAuthCookieShowTrue(t *testing.T) { + sess := _newMockSession() + if string(sess.AuthCookie(true)) != string(sess.authCookie) { + t.Fatal("Failed to return unredacted authcookie on request") + } +} + +func TestSessionAuthCookieShowFalse(t *testing.T) { + sess := _newMockSession() + if string(sess.AuthCookie(false)) != string("**REDACTED**") { + t.Fatal("Failed to return redacted authcookie on request") + } +} diff --git a/vendor/blitter.com/go/cryptmt/cryptmt.go b/vendor/blitter.com/go/cryptmt/cryptmt.go index 6b3960f..3a627ed 100644 --- a/vendor/blitter.com/go/cryptmt/cryptmt.go +++ b/vendor/blitter.com/go/cryptmt/cryptmt.go @@ -1,6 +1,6 @@ // Package CryptMT - implementation of cryptMTv1 stream cipher -// (but with mtwist64 as base accum) -// https://eprint.iacr.org/2005/165.pdf +// (but with mtwist64 as base accum) +// https://eprint.iacr.org/2005/165.pdf package cryptmt // TODO rlm: according to go docs, stream ciphers do not implement the @@ -11,34 +11,52 @@ package cryptmt import ( "errors" + "io" mtwist "blitter.com/go/mtwist" ) type Cipher struct { + r io.Reader + w io.Writer accum uint64 m *mtwist.MT19937_64 } -func (c *Cipher) yield8() (r byte) { +func (c *Cipher) yield() (r byte) { c.accum = c.accum * (c.m.Int63() | 1) r = byte(c.accum>>56) & 0xFF return } -// NewCipher creates and returns a Cipher. The key argument should be the +// New creates and returns a Cipher. The key argument should be the // CryptMT key, 64 bytes. -func NewCipher(key []byte) (c *Cipher) { - c = &Cipher{m: mtwist.New()} +func New(r io.Reader, w io.Writer, key []byte) (c *Cipher) { + c = &Cipher{m: mtwist.New(), r: r, w: w} c.m.SeedFullState(key) c.accum = 1 // from paper, discard first 64 bytes of output for idx := 0; idx < 64; idx++ { - _ = c.yield8() + _ = c.yield() } return c } +func (c *Cipher) Read(p []byte) (n int, err error) { + n, err = c.r.Read(p) + if err == nil { + for idx := 0; idx < n; idx++ { + p[idx] = p[idx] ^ c.yield() + } + } + return n, err +} + +func (c *Cipher) Write(p []byte) (n int, err error) { + n, err = c.w.Write(p) + return n, err +} + // XORKeyStream XORs each byte in the given slice with a byte from the // cipher's key stream. Dst and src must overlap entirely or not at all. // @@ -55,6 +73,6 @@ func (c *Cipher) XORKeyStream(dst, src []byte) { } for i, b := range src { - dst[i] = b ^ c.yield8() + dst[i] = b ^ c.yield() } } diff --git a/vendor/blitter.com/go/goutmp/README.md b/vendor/blitter.com/go/goutmp/README.md index ec6fe95..cb956fc 100644 --- a/vendor/blitter.com/go/goutmp/README.md +++ b/vendor/blitter.com/go/goutmp/README.md @@ -10,8 +10,8 @@ Golang bindings for basic login/utmp accounting type UtmpEntry struct{ ... } -func Put_lastlog_entry(app string, usr string, host string) +func Put_lastlog_entry(app, usr, ptsname, host string) func Unput_utmp(entry UtmpEntry) -func Put_utmp(user string, host string) UtmpEntry +func Put_utmp(user, ptsname, host string) UtmpEntry ``` diff --git a/vendor/blitter.com/go/wanderer/wanderer.go b/vendor/blitter.com/go/wanderer/wanderer.go deleted file mode 100644 index fe24277..0000000 --- a/vendor/blitter.com/go/wanderer/wanderer.go +++ /dev/null @@ -1,210 +0,0 @@ -// WANDERER - a crypto doodle that appears to give adequate -// protection to data in a stream cipher context -// -// Properties visualized using https://github.com/circulosmeos/circle -package wanderer - -// TODOs: -// -define s-box rotation/shuffle schema -// -devise p-box schema (? Meh. Need to blockify & re-streamify to do this) -// ... - -import ( - "errors" - "fmt" - "io" - "time" - - mtwist "blitter.com/go/mtwist" -) - -const ( - keylen = 512 - sboxCount = keylen / 8 -) - -// Given input byte x (treated as 2-bit dirs), -// 'walk' box applying XOR of each position (E/S/W/N) given box -// dimensions w,h -// NOTE to ensure reachability of all values within a box, w,h -// should not each exceed 3 and should probably stay at 2, to -// give more even coverage given random input. -func walkingXOR(key, s []byte, w, h int, x byte) (r byte) { - i := 0 - r = x - for sidx := range key { - ktemp := key[sidx] - r = r ^ (s[i]) - for shift := uint(0); shift < 8; shift += 2 { - // fmt.Println("i:", i, "r:", r) - dir := (ktemp >> shift) & 0x03 - switch dir { - case 0: - i = i + 1 - i = i % len(s) - case 1: - i = i + w - i = i % len(s) - case 2: - if i%w != 0 { - i = i - 1 - } else { - i = i + w - 1 - } - case 3: - if i >= w { - i = i - w - } else { - i = len(s) + i - w - } - } - r = r ^ (s[i]) - } - } - return -} - -func (c *Cipher) genSBoxen(n uint) { - c.sboxen = make([][]byte, n) - var idx uint - for ; idx < n; idx++ { - c.sboxen[idx] = make([]byte, c.sw*c.sh) - _, _ = c.prng.Read(c.sboxen[idx]) - } - //fmt.Fprintf(os.Stderr, "sboxen[0]:%v\n", c.sboxen[0]) -} - -// Mutate the session key (intended to be called as encryption -// proceeds), so that the 'walk path' through sboxes also does so. -func (c *Cipher) keyUpdate(perturb byte) { - c.k[c.kidx] = c.k[c.kidx] ^ c.k[(c.kidx+1)%uint(len(c.k))] - c.k[c.kidx] = c.k[c.kidx] ^ byte((c.prng.Int63()>>4)%256) - c.kidx = (c.kidx + uint(perturb)) % uint(len(c.k)) -} - -// slow - perturb a single octet of a single sbox for each octet -// (CV = ~8.725% over 700 MiB of 0-byte pt) -func (c *Cipher) sboxUpdateA(perturb byte) { - c.sboxen[perturb%sboxCount][int(perturb)%(c.sw+c.sh)] ^= - perturb -} - -// slower - perturb a single sbox for each octet -// (CV = ~?% over 700 MiB of 0-byte pt) -func (c *Cipher) sboxUpdateB(perturb byte) { - lim := c.sw * c.sh - for idx := 0; idx < lim; idx++ { - c.sboxen[perturb%sboxCount][idx] ^= perturb - } -} - -// slowest -- full sbox re-gen after each octet -// (but lowest CV, ~0.05% over 700MiB of 0-byte pt) -func (c *Cipher) sboxUpdateC(perturb byte) { - c.genSBoxen(sboxCount) - //c.sboxen[perturb%sboxCount][int(perturb)%(c.sw+c.sh)] ^= - // perturb -} - -type Cipher struct { - prng *mtwist.MT19937_64 - r io.Reader - w io.Writer - k []byte - kidx uint - sboxen [][]byte - sw int - sh int - sctr int // TODO: used to count down to re-keying & sbox regen - mode int - n byte -} - -func NewCodec(r io.Reader, w io.Writer, mode int, key []byte, width, height int) (c *Cipher) { - c = &Cipher{} - c.prng = mtwist.New() - if len(key) == 0 { - c.k = []byte(fmt.Sprintf("%s", time.Now())) - } else { - c.k = key - } - c.prng.SeedFullState(c.k) - - // Discard first 64 bytes of MT output - for idx := 0; idx < 64; idx++ { - _ = c.prng.Int63() - } - c.mode = mode - c.r = r - c.w = w - c.sw = width - c.sh = height - c.sctr = c.sw // sbox ctr: countdown to regen sboxes - c.n = 0 - c.genSBoxen(sboxCount) - - // fmt.Printf("%+v\n", sboxen) - // c.buf = make([]byte, 4) - return c -} - -func (c *Cipher) Read(p []byte) (n int, err error) { - n, err = c.r.Read(p) - if err == nil { - for idx := 0; idx < n; idx++ { - p[idx] = c.yield(p[idx]) - } - } - return n, err -} - -func (c *Cipher) Write(p []byte) (n int, err error) { - n, err = c.w.Write(p) - return n, err -} - -func (c *Cipher) yield(pt byte) (ct byte) { - ct = walkingXOR(c.k, c.sboxen[c.n], c.sw, c.sh, pt) - c.n = (c.n + 1) % byte(len(c.sboxen)) - c.keyUpdate(ct ^ pt) // must be equal in either encrypt/decrypt dirs - switch c.mode { - case 0: - // [nothing - varA] - break - case 1: - c.sboxUpdateA(ct ^ pt) // varA - case 2: - c.sboxUpdateB(ct ^ pt) // varB - case 3: - c.sboxUpdateC(ct ^ pt) // varC - default: - // [nothing] - } - // c.sctr = c.sctr - 1 - // if c.sctr == 0 { - // c.genSBoxen(sboxCount) - // c.sctr = c.sw - // } - return ct -} - -// XORKeyStream XORs each byte in the given slice with a byte from the -// cipher's key stream. Dst and src must overlap entirely or not at all. -// -// If len(dst) < len(src), XORKeyStream should panic. It is acceptable -// to pass a dst bigger than src, and in that case, XORKeyStream will -// only update dst[:len(src)] and will not touch the rest of dst. -// -// Multiple calls to XORKeyStream behave as if the concatenation of -// the src buffers was passed in a single run. That is, Stream -// maintains state and does not reset at each XORKeyStream call. -func (c *Cipher) XORKeyStream(dst, src []byte) { - //fmt.Printf("len dst:%d len src:%d\n", len(dst), len(src)) - if len(dst) < len(src) { - panic(errors.New("len(dst) < len(src)")) - } - - for idx, v := range src { - dst[idx] = c.yield(v) - } -} diff --git a/vendor/github.com/aead/chacha20/LICENSE b/vendor/github.com/aead/chacha20/LICENSE new file mode 100644 index 0000000..b6a9210 --- /dev/null +++ b/vendor/github.com/aead/chacha20/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Andreas Auernhammer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/aead/chacha20/chacha/chacha.go b/vendor/github.com/aead/chacha20/chacha/chacha.go new file mode 100644 index 0000000..c2b39da --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha.go @@ -0,0 +1,197 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// Package chacha implements some low-level functions of the +// ChaCha cipher family. +package chacha // import "github.com/aead/chacha20/chacha" + +import ( + "encoding/binary" + "errors" + "math" +) + +const ( + // NonceSize is the size of the ChaCha20 nonce in bytes. + NonceSize = 8 + + // INonceSize is the size of the IETF-ChaCha20 nonce in bytes. + INonceSize = 12 + + // XNonceSize is the size of the XChaCha20 nonce in bytes. + XNonceSize = 24 + + // KeySize is the size of the key in bytes. + KeySize = 32 +) + +var ( + useSSE2 bool + useSSSE3 bool + useAVX bool + useAVX2 bool +) + +var ( + errKeySize = errors.New("chacha20/chacha: bad key length") + errInvalidNonce = errors.New("chacha20/chacha: bad nonce length") +) + +func setup(state *[64]byte, nonce, key []byte) (err error) { + if len(key) != KeySize { + err = errKeySize + return + } + var Nonce [16]byte + switch len(nonce) { + case NonceSize: + copy(Nonce[8:], nonce) + initialize(state, key, &Nonce) + case INonceSize: + copy(Nonce[4:], nonce) + initialize(state, key, &Nonce) + case XNonceSize: + var tmpKey [32]byte + var hNonce [16]byte + + copy(hNonce[:], nonce[:16]) + copy(tmpKey[:], key) + HChaCha20(&tmpKey, &hNonce, &tmpKey) + copy(Nonce[8:], nonce[16:]) + initialize(state, tmpKey[:], &Nonce) + + // BUG(aead): A "good" compiler will remove this (optimizations) + // But using the provided key instead of tmpKey, + // will change the key (-> probably confuses users) + for i := range tmpKey { + tmpKey[i] = 0 + } + default: + err = errInvalidNonce + } + return +} + +// XORKeyStream crypts bytes from src to dst using the given nonce and key. +// The length of the nonce determinds the version of ChaCha20: +// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. +// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. +// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. +// The rounds argument specifies the number of rounds performed for keystream +// generation - valid values are 8, 12 or 20. The src and dst may be the same slice +// but otherwise should not overlap. If len(dst) < len(src) this function panics. +// If the nonce is neither 64, 96 nor 192 bits long, this function panics. +func XORKeyStream(dst, src, nonce, key []byte, rounds int) { + if rounds != 20 && rounds != 12 && rounds != 8 { + panic("chacha20/chacha: bad number of rounds") + } + if len(dst) < len(src) { + panic("chacha20/chacha: dst buffer is to small") + } + if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) { + panic("chacha20/chacha: src is too large") + } + + var block, state [64]byte + if err := setup(&state, nonce, key); err != nil { + panic(err) + } + xorKeyStream(dst, src, &block, &state, rounds) +} + +// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r. +type Cipher struct { + state, block [64]byte + off int + rounds int // 20 for ChaCha20 + noncesize int +} + +// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r +// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time. +// The length of the nonce determinds the version of ChaCha20: +// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. +// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. +// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. +// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned. +func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) { + if rounds != 20 && rounds != 12 && rounds != 8 { + panic("chacha20/chacha: bad number of rounds") + } + + c := new(Cipher) + if err := setup(&(c.state), nonce, key); err != nil { + return nil, err + } + c.rounds = rounds + + if len(nonce) == INonceSize { + c.noncesize = INonceSize + } else { + c.noncesize = NonceSize + } + + return c, nil +} + +// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice +// but otherwise should not overlap. If len(dst) < len(src) the function panics. +func (c *Cipher) XORKeyStream(dst, src []byte) { + if len(dst) < len(src) { + panic("chacha20/chacha: dst buffer is to small") + } + + if c.off > 0 { + n := len(c.block[c.off:]) + if len(src) <= n { + for i, v := range src { + dst[i] = v ^ c.block[c.off] + c.off++ + } + if c.off == 64 { + c.off = 0 + } + return + } + + for i, v := range c.block[c.off:] { + dst[i] = src[i] ^ v + } + src = src[n:] + dst = dst[n:] + c.off = 0 + } + + // check for counter overflow + blocksToXOR := len(src) / 64 + if len(src)%64 != 0 { + blocksToXOR++ + } + var overflow bool + if c.noncesize == INonceSize { + overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR) + } else { + overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR) + } + if overflow { + panic("chacha20/chacha: counter overflow") + } + + c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds) +} + +// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher. +// This function always skips the unused keystream of the current 64 byte block. +func (c *Cipher) SetCounter(ctr uint64) { + if c.noncesize == INonceSize { + binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr)) + } else { + binary.LittleEndian.PutUint64(c.state[48:], ctr) + } + c.off = 0 +} + +// HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key. +// It can be used as a key-derivation-function (KDF). +func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) } diff --git a/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s b/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s new file mode 100644 index 0000000..c2b5f52 --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s @@ -0,0 +1,406 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build amd64,!gccgo,!appengine,!nacl + +#include "const.s" +#include "macro.s" + +#define TWO 0(SP) +#define C16 32(SP) +#define C8 64(SP) +#define STATE_0 96(SP) +#define STATE_1 128(SP) +#define STATE_2 160(SP) +#define STATE_3 192(SP) +#define TMP_0 224(SP) +#define TMP_1 256(SP) + +// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int +TEXT ·xorKeyStreamAVX2(SB), 4, $320-80 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ block+48(FP), BX + MOVQ state+56(FP), AX + MOVQ rounds+64(FP), DX + MOVQ src_len+32(FP), CX + + MOVQ SP, R8 + ADDQ $32, SP + ANDQ $-32, SP + + VMOVDQU 0(AX), Y2 + VMOVDQU 32(AX), Y3 + VPERM2I128 $0x22, Y2, Y0, Y0 + VPERM2I128 $0x33, Y2, Y1, Y1 + VPERM2I128 $0x22, Y3, Y2, Y2 + VPERM2I128 $0x33, Y3, Y3, Y3 + + TESTQ CX, CX + JZ done + + VMOVDQU ·one_AVX2<>(SB), Y4 + VPADDD Y4, Y3, Y3 + + VMOVDQA Y0, STATE_0 + VMOVDQA Y1, STATE_1 + VMOVDQA Y2, STATE_2 + VMOVDQA Y3, STATE_3 + + VMOVDQU ·rol16_AVX2<>(SB), Y4 + VMOVDQU ·rol8_AVX2<>(SB), Y5 + VMOVDQU ·two_AVX2<>(SB), Y6 + VMOVDQA Y4, Y14 + VMOVDQA Y5, Y15 + VMOVDQA Y4, C16 + VMOVDQA Y5, C8 + VMOVDQA Y6, TWO + + CMPQ CX, $64 + JBE between_0_and_64 + CMPQ CX, $192 + JBE between_64_and_192 + CMPQ CX, $320 + JBE between_192_and_320 + CMPQ CX, $448 + JBE between_320_and_448 + +at_least_512: + VMOVDQA Y0, Y4 + VMOVDQA Y1, Y5 + VMOVDQA Y2, Y6 + VPADDQ TWO, Y3, Y7 + VMOVDQA Y0, Y8 + VMOVDQA Y1, Y9 + VMOVDQA Y2, Y10 + VPADDQ TWO, Y7, Y11 + VMOVDQA Y0, Y12 + VMOVDQA Y1, Y13 + VMOVDQA Y2, Y14 + VPADDQ TWO, Y11, Y15 + + MOVQ DX, R9 + +chacha_loop_512: + VMOVDQA Y8, TMP_0 + CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) + VMOVDQA TMP_0, Y8 + VMOVDQA Y0, TMP_0 + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) + CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) + CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) + CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) + CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) + CHACHA_SHUFFLE_AVX(Y13, Y14, Y15) + + CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) + VMOVDQA TMP_0, Y0 + VMOVDQA Y8, TMP_0 + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) + CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) + VMOVDQA TMP_0, Y8 + CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) + CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) + CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) + CHACHA_SHUFFLE_AVX(Y15, Y14, Y13) + SUBQ $2, R9 + JA chacha_loop_512 + + VMOVDQA Y12, TMP_0 + VMOVDQA Y13, TMP_1 + VPADDD STATE_0, Y0, Y0 + VPADDD STATE_1, Y1, Y1 + VPADDD STATE_2, Y2, Y2 + VPADDD STATE_3, Y3, Y3 + XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) + VMOVDQA STATE_0, Y0 + VMOVDQA STATE_1, Y1 + VMOVDQA STATE_2, Y2 + VMOVDQA STATE_3, Y3 + VPADDQ TWO, Y3, Y3 + + VPADDD Y0, Y4, Y4 + VPADDD Y1, Y5, Y5 + VPADDD Y2, Y6, Y6 + VPADDD Y3, Y7, Y7 + XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) + VPADDQ TWO, Y3, Y3 + + VPADDD Y0, Y8, Y8 + VPADDD Y1, Y9, Y9 + VPADDD Y2, Y10, Y10 + VPADDD Y3, Y11, Y11 + XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) + VPADDQ TWO, Y3, Y3 + + VPADDD TMP_0, Y0, Y12 + VPADDD TMP_1, Y1, Y13 + VPADDD Y2, Y14, Y14 + VPADDD Y3, Y15, Y15 + VPADDQ TWO, Y3, Y3 + + CMPQ CX, $512 + JB less_than_512 + + XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) + VMOVDQA Y3, STATE_3 + ADDQ $512, SI + ADDQ $512, DI + SUBQ $512, CX + CMPQ CX, $448 + JA at_least_512 + + TESTQ CX, CX + JZ done + + VMOVDQA C16, Y14 + VMOVDQA C8, Y15 + + CMPQ CX, $64 + JBE between_0_and_64 + CMPQ CX, $192 + JBE between_64_and_192 + CMPQ CX, $320 + JBE between_192_and_320 + JMP between_320_and_448 + +less_than_512: + XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) + EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4) + ADDQ $448, SI + ADDQ $448, DI + SUBQ $448, CX + JMP finalize + +between_320_and_448: + VMOVDQA Y0, Y4 + VMOVDQA Y1, Y5 + VMOVDQA Y2, Y6 + VPADDQ TWO, Y3, Y7 + VMOVDQA Y0, Y8 + VMOVDQA Y1, Y9 + VMOVDQA Y2, Y10 + VPADDQ TWO, Y7, Y11 + + MOVQ DX, R9 + +chacha_loop_384: + CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) + CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) + CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) + CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) + CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) + CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) + SUBQ $2, R9 + JA chacha_loop_384 + + VPADDD STATE_0, Y0, Y0 + VPADDD STATE_1, Y1, Y1 + VPADDD STATE_2, Y2, Y2 + VPADDD STATE_3, Y3, Y3 + XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) + VMOVDQA STATE_0, Y0 + VMOVDQA STATE_1, Y1 + VMOVDQA STATE_2, Y2 + VMOVDQA STATE_3, Y3 + VPADDQ TWO, Y3, Y3 + + VPADDD Y0, Y4, Y4 + VPADDD Y1, Y5, Y5 + VPADDD Y2, Y6, Y6 + VPADDD Y3, Y7, Y7 + XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) + VPADDQ TWO, Y3, Y3 + + VPADDD Y0, Y8, Y8 + VPADDD Y1, Y9, Y9 + VPADDD Y2, Y10, Y10 + VPADDD Y3, Y11, Y11 + VPADDQ TWO, Y3, Y3 + + CMPQ CX, $384 + JB less_than_384 + + XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) + SUBQ $384, CX + TESTQ CX, CX + JE done + + ADDQ $384, SI + ADDQ $384, DI + JMP between_0_and_64 + +less_than_384: + XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) + EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) + ADDQ $320, SI + ADDQ $320, DI + SUBQ $320, CX + JMP finalize + +between_192_and_320: + VMOVDQA Y0, Y4 + VMOVDQA Y1, Y5 + VMOVDQA Y2, Y6 + VMOVDQA Y3, Y7 + VMOVDQA Y0, Y8 + VMOVDQA Y1, Y9 + VMOVDQA Y2, Y10 + VPADDQ TWO, Y3, Y11 + + MOVQ DX, R9 + +chacha_loop_256: + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) + CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) + CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) + SUBQ $2, R9 + JA chacha_loop_256 + + VPADDD Y0, Y4, Y4 + VPADDD Y1, Y5, Y5 + VPADDD Y2, Y6, Y6 + VPADDD Y3, Y7, Y7 + VPADDQ TWO, Y3, Y3 + XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) + VPADDD Y0, Y8, Y8 + VPADDD Y1, Y9, Y9 + VPADDD Y2, Y10, Y10 + VPADDD Y3, Y11, Y11 + VPADDQ TWO, Y3, Y3 + + CMPQ CX, $256 + JB less_than_256 + + XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) + SUBQ $256, CX + TESTQ CX, CX + JE done + + ADDQ $256, SI + ADDQ $256, DI + JMP between_0_and_64 + +less_than_256: + XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) + EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) + ADDQ $192, SI + ADDQ $192, DI + SUBQ $192, CX + JMP finalize + +between_64_and_192: + VMOVDQA Y0, Y4 + VMOVDQA Y1, Y5 + VMOVDQA Y2, Y6 + VMOVDQA Y3, Y7 + + MOVQ DX, R9 + +chacha_loop_128: + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) + CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) + CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) + SUBQ $2, R9 + JA chacha_loop_128 + + VPADDD Y0, Y4, Y4 + VPADDD Y1, Y5, Y5 + VPADDD Y2, Y6, Y6 + VPADDD Y3, Y7, Y7 + VPADDQ TWO, Y3, Y3 + + CMPQ CX, $128 + JB less_than_128 + + XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) + SUBQ $128, CX + TESTQ CX, CX + JE done + + ADDQ $128, SI + ADDQ $128, DI + JMP between_0_and_64 + +less_than_128: + XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) + EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13) + ADDQ $64, SI + ADDQ $64, DI + SUBQ $64, CX + JMP finalize + +between_0_and_64: + VMOVDQA X0, X4 + VMOVDQA X1, X5 + VMOVDQA X2, X6 + VMOVDQA X3, X7 + + MOVQ DX, R9 + +chacha_loop_64: + CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) + CHACHA_SHUFFLE_AVX(X5, X6, X7) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) + CHACHA_SHUFFLE_AVX(X7, X6, X5) + SUBQ $2, R9 + JA chacha_loop_64 + + VPADDD X0, X4, X4 + VPADDD X1, X5, X5 + VPADDD X2, X6, X6 + VPADDD X3, X7, X7 + VMOVDQU ·one<>(SB), X0 + VPADDQ X0, X3, X3 + + CMPQ CX, $64 + JB less_than_64 + + XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13) + SUBQ $64, CX + JMP done + +less_than_64: + VMOVDQU X4, 0(BX) + VMOVDQU X5, 16(BX) + VMOVDQU X6, 32(BX) + VMOVDQU X7, 48(BX) + +finalize: + XORQ R11, R11 + XORQ R12, R12 + MOVQ CX, BP + +xor_loop: + MOVB 0(SI), R11 + MOVB 0(BX), R12 + XORQ R11, R12 + MOVB R12, 0(DI) + INCQ SI + INCQ BX + INCQ DI + DECQ BP + JA xor_loop + +done: + VMOVDQU X3, 48(AX) + VZEROUPPER + MOVQ R8, SP + MOVQ CX, ret+72(FP) + RET + diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_386.go b/vendor/github.com/aead/chacha20/chacha/chacha_386.go new file mode 100644 index 0000000..97e533d --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_386.go @@ -0,0 +1,60 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build 386,!gccgo,!appengine,!nacl + +package chacha + +import ( + "encoding/binary" + + "golang.org/x/sys/cpu" +) + +func init() { + useSSE2 = cpu.X86.HasSSE2 + useSSSE3 = cpu.X86.HasSSSE3 + useAVX = false + useAVX2 = false +} + +func initialize(state *[64]byte, key []byte, nonce *[16]byte) { + binary.LittleEndian.PutUint32(state[0:], sigma[0]) + binary.LittleEndian.PutUint32(state[4:], sigma[1]) + binary.LittleEndian.PutUint32(state[8:], sigma[2]) + binary.LittleEndian.PutUint32(state[12:], sigma[3]) + copy(state[16:], key[:]) + copy(state[48:], nonce[:]) +} + +// This function is implemented in chacha_386.s +//go:noescape +func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) + +// This function is implemented in chacha_386.s +//go:noescape +func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) + +// This function is implemented in chacha_386.s +//go:noescape +func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int + +func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { + switch { + case useSSSE3: + hChaCha20SSSE3(out, nonce, key) + case useSSE2: + hChaCha20SSE2(out, nonce, key) + default: + hChaCha20Generic(out, nonce, key) + } +} + +func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { + if useSSE2 { + return xorKeyStreamSSE2(dst, src, block, state, rounds) + } else { + return xorKeyStreamGeneric(dst, src, block, state, rounds) + } +} diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_386.s b/vendor/github.com/aead/chacha20/chacha/chacha_386.s new file mode 100644 index 0000000..262fc86 --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_386.s @@ -0,0 +1,163 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build 386,!gccgo,!appengine,!nacl + +#include "const.s" +#include "macro.s" + +// FINALIZE xors len bytes from src and block using +// the temp. registers t0 and t1 and writes the result +// to dst. +#define FINALIZE(dst, src, block, len, t0, t1) \ + XORL t0, t0; \ + XORL t1, t1; \ + FINALIZE_LOOP:; \ + MOVB 0(src), t0; \ + MOVB 0(block), t1; \ + XORL t0, t1; \ + MOVB t1, 0(dst); \ + INCL src; \ + INCL block; \ + INCL dst; \ + DECL len; \ + JG FINALIZE_LOOP \ + +#define Dst DI +#define Nonce AX +#define Key BX +#define Rounds DX + +// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) +TEXT ·hChaCha20SSE2(SB), 4, $0-12 + MOVL out+0(FP), Dst + MOVL nonce+4(FP), Nonce + MOVL key+8(FP), Key + + MOVOU ·sigma<>(SB), X0 + MOVOU 0*16(Key), X1 + MOVOU 1*16(Key), X2 + MOVOU 0*16(Nonce), X3 + MOVL $20, Rounds + +chacha_loop: + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_SHUFFLE_SSE(X3, X2, X1) + SUBL $2, Rounds + JNZ chacha_loop + + MOVOU X0, 0*16(Dst) + MOVOU X3, 1*16(Dst) + RET + +// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) +TEXT ·hChaCha20SSSE3(SB), 4, $0-12 + MOVL out+0(FP), Dst + MOVL nonce+4(FP), Nonce + MOVL key+8(FP), Key + + MOVOU ·sigma<>(SB), X0 + MOVOU 0*16(Key), X1 + MOVOU 1*16(Key), X2 + MOVOU 0*16(Nonce), X3 + MOVL $20, Rounds + + MOVOU ·rol16<>(SB), X5 + MOVOU ·rol8<>(SB), X6 + +chacha_loop: + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_SSE(X3, X2, X1) + SUBL $2, Rounds + JNZ chacha_loop + + MOVOU X0, 0*16(Dst) + MOVOU X3, 1*16(Dst) + RET + +#undef Dst +#undef Nonce +#undef Key +#undef Rounds + +#define State AX +#define Dst DI +#define Src SI +#define Len DX +#define Tmp0 BX +#define Tmp1 BP + +// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int +TEXT ·xorKeyStreamSSE2(SB), 4, $0-40 + MOVL dst_base+0(FP), Dst + MOVL src_base+12(FP), Src + MOVL state+28(FP), State + MOVL src_len+16(FP), Len + MOVL $0, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0 + + MOVOU 0*16(State), X0 + MOVOU 1*16(State), X1 + MOVOU 2*16(State), X2 + MOVOU 3*16(State), X3 + TESTL Len, Len + JZ DONE + +GENERATE_KEYSTREAM: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + MOVL rounds+32(FP), Tmp0 + +CHACHA_LOOP: + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBL $2, Tmp0 + JA CHACHA_LOOP + + MOVOU 0*16(State), X0 // Restore X0 from state + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + MOVOU ·one<>(SB), X0 + PADDQ X0, X3 + + CMPL Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0) + MOVOU 0*16(State), X0 // Restore X0 from state + ADDL $64, Src + ADDL $64, Dst + SUBL $64, Len + JZ DONE + JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte + +BUFFER_KEYSTREAM: + MOVL block+24(FP), State + MOVOU X4, 0(State) + MOVOU X5, 16(State) + MOVOU X6, 32(State) + MOVOU X7, 48(State) + MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 < Len < 64 + FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1) + +DONE: + MOVL state+28(FP), State + MOVOU X3, 3*16(State) + RET + +#undef State +#undef Dst +#undef Src +#undef Len +#undef Tmp0 +#undef Tmp1 diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go new file mode 100644 index 0000000..635f7de --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.go @@ -0,0 +1,76 @@ +// Copyright (c) 2017 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build go1.7,amd64,!gccgo,!appengine,!nacl + +package chacha + +import "golang.org/x/sys/cpu" + +func init() { + useSSE2 = cpu.X86.HasSSE2 + useSSSE3 = cpu.X86.HasSSSE3 + useAVX = cpu.X86.HasAVX + useAVX2 = cpu.X86.HasAVX2 +} + +// This function is implemented in chacha_amd64.s +//go:noescape +func initialize(state *[64]byte, key []byte, nonce *[16]byte) + +// This function is implemented in chacha_amd64.s +//go:noescape +func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) + +// This function is implemented in chacha_amd64.s +//go:noescape +func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) + +// This function is implemented in chachaAVX2_amd64.s +//go:noescape +func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte) + +// This function is implemented in chacha_amd64.s +//go:noescape +func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int + +// This function is implemented in chacha_amd64.s +//go:noescape +func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int + +// This function is implemented in chacha_amd64.s +//go:noescape +func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int + +// This function is implemented in chachaAVX2_amd64.s +//go:noescape +func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int + +func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { + switch { + case useAVX: + hChaCha20AVX(out, nonce, key) + case useSSSE3: + hChaCha20SSSE3(out, nonce, key) + case useSSE2: + hChaCha20SSE2(out, nonce, key) + default: + hChaCha20Generic(out, nonce, key) + } +} + +func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { + switch { + case useAVX2: + return xorKeyStreamAVX2(dst, src, block, state, rounds) + case useAVX: + return xorKeyStreamAVX(dst, src, block, state, rounds) + case useSSSE3: + return xorKeyStreamSSSE3(dst, src, block, state, rounds) + case useSSE2: + return xorKeyStreamSSE2(dst, src, block, state, rounds) + default: + return xorKeyStreamGeneric(dst, src, block, state, rounds) + } +} diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s new file mode 100644 index 0000000..26a2383 --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_amd64.s @@ -0,0 +1,1072 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build amd64,!gccgo,!appengine,!nacl + +#include "const.s" +#include "macro.s" + +// FINALIZE xors len bytes from src and block using +// the temp. registers t0 and t1 and writes the result +// to dst. +#define FINALIZE(dst, src, block, len, t0, t1) \ + XORQ t0, t0; \ + XORQ t1, t1; \ + FINALIZE_LOOP:; \ + MOVB 0(src), t0; \ + MOVB 0(block), t1; \ + XORQ t0, t1; \ + MOVB t1, 0(dst); \ + INCQ src; \ + INCQ block; \ + INCQ dst; \ + DECQ len; \ + JG FINALIZE_LOOP \ + +#define Dst DI +#define Nonce AX +#define Key BX +#define Rounds DX + +// func initialize(state *[64]byte, key []byte, nonce *[16]byte) +TEXT ·initialize(SB), 4, $0-40 + MOVQ state+0(FP), Dst + MOVQ key+8(FP), Key + MOVQ nonce+32(FP), Nonce + + MOVOU ·sigma<>(SB), X0 + MOVOU 0*16(Key), X1 + MOVOU 1*16(Key), X2 + MOVOU 0*16(Nonce), X3 + + MOVOU X0, 0*16(Dst) + MOVOU X1, 1*16(Dst) + MOVOU X2, 2*16(Dst) + MOVOU X3, 3*16(Dst) + RET + +// func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte) +TEXT ·hChaCha20AVX(SB), 4, $0-24 + MOVQ out+0(FP), Dst + MOVQ nonce+8(FP), Nonce + MOVQ key+16(FP), Key + + VMOVDQU ·sigma<>(SB), X0 + VMOVDQU 0*16(Key), X1 + VMOVDQU 1*16(Key), X2 + VMOVDQU 0*16(Nonce), X3 + VMOVDQU ·rol16_AVX2<>(SB), X5 + VMOVDQU ·rol8_AVX2<>(SB), X6 + MOVQ $20, Rounds + +CHACHA_LOOP: + CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_AVX(X1, X2, X3) + CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_AVX(X3, X2, X1) + SUBQ $2, Rounds + JNZ CHACHA_LOOP + + VMOVDQU X0, 0*16(Dst) + VMOVDQU X3, 1*16(Dst) + VZEROUPPER + RET + +// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) +TEXT ·hChaCha20SSE2(SB), 4, $0-24 + MOVQ out+0(FP), Dst + MOVQ nonce+8(FP), Nonce + MOVQ key+16(FP), Key + + MOVOU ·sigma<>(SB), X0 + MOVOU 0*16(Key), X1 + MOVOU 1*16(Key), X2 + MOVOU 0*16(Nonce), X3 + MOVQ $20, Rounds + +CHACHA_LOOP: + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_SHUFFLE_SSE(X3, X2, X1) + SUBQ $2, Rounds + JNZ CHACHA_LOOP + + MOVOU X0, 0*16(Dst) + MOVOU X3, 1*16(Dst) + RET + +// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) +TEXT ·hChaCha20SSSE3(SB), 4, $0-24 + MOVQ out+0(FP), Dst + MOVQ nonce+8(FP), Nonce + MOVQ key+16(FP), Key + + MOVOU ·sigma<>(SB), X0 + MOVOU 0*16(Key), X1 + MOVOU 1*16(Key), X2 + MOVOU 0*16(Nonce), X3 + MOVOU ·rol16<>(SB), X5 + MOVOU ·rol8<>(SB), X6 + MOVQ $20, Rounds + +chacha_loop: + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) + CHACHA_SHUFFLE_SSE(X3, X2, X1) + SUBQ $2, Rounds + JNZ chacha_loop + + MOVOU X0, 0*16(Dst) + MOVOU X3, 1*16(Dst) + RET + +#undef Dst +#undef Nonce +#undef Key +#undef Rounds + +#define Dst DI +#define Src SI +#define Len R12 +#define Rounds DX +#define Buffer BX +#define State AX +#define Stack SP +#define SavedSP R8 +#define Tmp0 R9 +#define Tmp1 R10 +#define Tmp2 R11 + +// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int +TEXT ·xorKeyStreamSSE2(SB), 4, $112-80 + MOVQ dst_base+0(FP), Dst + MOVQ src_base+24(FP), Src + MOVQ block+48(FP), Buffer + MOVQ state+56(FP), State + MOVQ rounds+64(FP), Rounds + MOVQ src_len+32(FP), Len + + MOVOU 0*16(State), X0 + MOVOU 1*16(State), X1 + MOVOU 2*16(State), X2 + MOVOU 3*16(State), X3 + + MOVQ Stack, SavedSP + ADDQ $16, Stack + ANDQ $-16, Stack + + TESTQ Len, Len + JZ DONE + + MOVOU ·one<>(SB), X4 + MOVO X0, 0*16(Stack) + MOVO X1, 1*16(Stack) + MOVO X2, 2*16(Stack) + MOVO X3, 3*16(Stack) + MOVO X4, 4*16(Stack) + + CMPQ Len, $64 + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 + JLE GENERATE_KEYSTREAM_192 + +GENERATE_KEYSTREAM_256: + MOVO X0, X12 + MOVO X1, X13 + MOVO X2, X14 + MOVO X3, X15 + PADDQ 4*16(Stack), X15 + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X15, X11 + PADDQ 4*16(Stack), X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X11, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + + MOVO X3, 3*16(Stack) // Save X3 + +CHACHA_LOOP_256: + MOVO X4, 5*16(Stack) + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_QROUND_SSE2(X12, X13, X14, X15, X4) + MOVO 5*16(Stack), X4 + MOVO X0, 5*16(Stack) + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + MOVO 5*16(Stack), X0 + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_SHUFFLE_SSE(X13, X14, X15) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + MOVO X4, 5*16(Stack) + CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) + CHACHA_QROUND_SSE2(X12, X13, X14, X15, X4) + MOVO 5*16(Stack), X4 + MOVO X0, 5*16(Stack) + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + MOVO 5*16(Stack), X0 + CHACHA_SHUFFLE_SSE(X3, X2, X1) + CHACHA_SHUFFLE_SSE(X15, X14, X13) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_256 + + PADDL 0*16(Stack), X0 + PADDL 1*16(Stack), X1 + PADDL 2*16(Stack), X2 + PADDL 3*16(Stack), X3 + MOVO X4, 5*16(Stack) // Save X4 + XOR_SSE(Dst, Src, 0, X0, X1, X2, X3, X4) + MOVO 5*16(Stack), X4 // Restore X4 + + MOVO 0*16(Stack), X0 + MOVO 1*16(Stack), X1 + MOVO 2*16(Stack), X2 + MOVO 3*16(Stack), X3 + PADDQ 4*16(Stack), X3 + + PADDL X0, X12 + PADDL X1, X13 + PADDL X2, X14 + PADDL X3, X15 + PADDQ 4*16(Stack), X3 + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 64, X12, X13, X14, X15, X0) + XOR_SSE(Dst, Src, 128, X8, X9, X10, X11, X0) + MOVO 0*16(Stack), X0 // Restore X0 + ADDQ $192, Dst + ADDQ $192, Src + SUBQ $192, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream. + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream + JG GENERATE_KEYSTREAM_256 + +GENERATE_KEYSTREAM_192: + MOVO X0, X12 + MOVO X1, X13 + MOVO X2, X14 + MOVO X3, X15 + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X3, X11 + PADDQ 4*16(Stack), X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X11, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + +CHACHA_LOOP_192: + CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0) + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + CHACHA_SHUFFLE_SSE(X13, X14, X15) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0) + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) + CHACHA_SHUFFLE_SSE(X15, X14, X13) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_192 + + MOVO 0*16(Stack), X0 // Restore X0 + PADDL X0, X12 + PADDL X1, X13 + PADDL X2, X14 + PADDL X3, X15 + PADDQ 4*16(Stack), X3 + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 0, X12, X13, X14, X15, X0) + XOR_SSE(Dst, Src, 64, X8, X9, X10, X11, X0) + MOVO 0*16(Stack), X0 // Restore X0 + ADDQ $128, Dst + ADDQ $128, Src + SUBQ $128, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + +GENERATE_KEYSTREAM_128: + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X3, X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + +CHACHA_LOOP_128: + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_128 + + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 0, X8, X9, X10, X11, X12) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream + +GENERATE_KEYSTREAM_64: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + MOVQ Rounds, Tmp0 + +CHACHA_LOOP_64: + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_64 + + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Src + ADDQ $64, Dst + SUBQ $64, Len + JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true. + +BUFFER_KEYSTREAM: + MOVOU X4, 0*16(Buffer) + MOVOU X5, 1*16(Buffer) + MOVOU X6, 2*16(Buffer) + MOVOU X7, 3*16(Buffer) + MOVQ Len, Tmp0 + FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2) + +DONE: + MOVQ SavedSP, Stack // Restore stack pointer + MOVOU X3, 3*16(State) + MOVQ Len, ret+72(FP) + RET + +// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int +TEXT ·xorKeyStreamSSSE3(SB), 4, $144-80 + MOVQ dst_base+0(FP), Dst + MOVQ src_base+24(FP), Src + MOVQ block+48(FP), Buffer + MOVQ state+56(FP), State + MOVQ rounds+64(FP), Rounds + MOVQ src_len+32(FP), Len + + MOVOU 0*16(State), X0 + MOVOU 1*16(State), X1 + MOVOU 2*16(State), X2 + MOVOU 3*16(State), X3 + + MOVQ Stack, SavedSP + ADDQ $16, Stack + ANDQ $-16, Stack + + TESTQ Len, Len + JZ DONE + + MOVOU ·one<>(SB), X4 + MOVOU ·rol16<>(SB), X5 + MOVOU ·rol8<>(SB), X6 + MOVO X0, 0*16(Stack) + MOVO X1, 1*16(Stack) + MOVO X2, 2*16(Stack) + MOVO X3, 3*16(Stack) + MOVO X4, 4*16(Stack) + MOVO X5, 6*16(Stack) + MOVO X6, 7*16(Stack) + + CMPQ Len, $64 + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 + JLE GENERATE_KEYSTREAM_192 + +GENERATE_KEYSTREAM_256: + MOVO X0, X12 + MOVO X1, X13 + MOVO X2, X14 + MOVO X3, X15 + PADDQ 4*16(Stack), X15 + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X15, X11 + PADDQ 4*16(Stack), X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X11, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + + MOVO X3, 3*16(Stack) // Save X3 + +CHACHA_LOOP_256: + MOVO X4, 5*16(Stack) + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack)) + MOVO 5*16(Stack), X4 + MOVO X0, 5*16(Stack) + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack)) + MOVO 5*16(Stack), X0 + CHACHA_SHUFFLE_SSE(X1, X2, X3) + CHACHA_SHUFFLE_SSE(X13, X14, X15) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + MOVO X4, 5*16(Stack) + CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack)) + MOVO 5*16(Stack), X4 + MOVO X0, 5*16(Stack) + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack)) + MOVO 5*16(Stack), X0 + CHACHA_SHUFFLE_SSE(X3, X2, X1) + CHACHA_SHUFFLE_SSE(X15, X14, X13) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_256 + + PADDL 0*16(Stack), X0 + PADDL 1*16(Stack), X1 + PADDL 2*16(Stack), X2 + PADDL 3*16(Stack), X3 + MOVO X4, 5*16(Stack) // Save X4 + XOR_SSE(Dst, Src, 0, X0, X1, X2, X3, X4) + MOVO 5*16(Stack), X4 // Restore X4 + + MOVO 0*16(Stack), X0 + MOVO 1*16(Stack), X1 + MOVO 2*16(Stack), X2 + MOVO 3*16(Stack), X3 + PADDQ 4*16(Stack), X3 + + PADDL X0, X12 + PADDL X1, X13 + PADDL X2, X14 + PADDL X3, X15 + PADDQ 4*16(Stack), X3 + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 64, X12, X13, X14, X15, X0) + XOR_SSE(Dst, Src, 128, X8, X9, X10, X11, X0) + MOVO 0*16(Stack), X0 // Restore X0 + ADDQ $192, Dst + ADDQ $192, Src + SUBQ $192, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream. + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream + JG GENERATE_KEYSTREAM_256 + +GENERATE_KEYSTREAM_192: + MOVO X0, X12 + MOVO X1, X13 + MOVO X2, X14 + MOVO X3, X15 + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X3, X11 + PADDQ 4*16(Stack), X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X11, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + + MOVO 6*16(Stack), X1 // Load 16 bit rotate-left constant + MOVO 7*16(Stack), X2 // Load 8 bit rotate-left constant + +CHACHA_LOOP_192: + CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, X1, X2) + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, X1, X2) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2) + CHACHA_SHUFFLE_SSE(X13, X14, X15) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, X1, X2) + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, X1, X2) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2) + CHACHA_SHUFFLE_SSE(X15, X14, X13) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_192 + + MOVO 0*16(Stack), X0 // Restore X0 + MOVO 1*16(Stack), X1 // Restore X1 + MOVO 2*16(Stack), X2 // Restore X2 + PADDL X0, X12 + PADDL X1, X13 + PADDL X2, X14 + PADDL X3, X15 + PADDQ 4*16(Stack), X3 + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 0, X12, X13, X14, X15, X0) + XOR_SSE(Dst, Src, 64, X8, X9, X10, X11, X0) + MOVO 0*16(Stack), X0 // Restore X0 + ADDQ $128, Dst + ADDQ $128, Src + SUBQ $128, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + +GENERATE_KEYSTREAM_128: + MOVO X0, X8 + MOVO X1, X9 + MOVO X2, X10 + MOVO X3, X11 + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + PADDQ 4*16(Stack), X7 + MOVQ Rounds, Tmp0 + + MOVO 6*16(Stack), X13 // Load 16 bit rotate-left constant + MOVO 7*16(Stack), X14 // Load 8 bit rotate-left constant + +CHACHA_LOOP_128: + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14) + CHACHA_SHUFFLE_SSE(X9, X10, X11) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14) + CHACHA_SHUFFLE_SSE(X11, X10, X9) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_128 + + PADDL X0, X8 + PADDL X1, X9 + PADDL X2, X10 + PADDL X3, X11 + PADDQ 4*16(Stack), X3 + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + XOR_SSE(Dst, Src, 0, X8, X9, X10, X11, X12) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream + +GENERATE_KEYSTREAM_64: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + MOVQ Rounds, Tmp0 + + MOVO 6*16(Stack), X9 // Load 16 bit rotate-left constant + MOVO 7*16(Stack), X10 // Load 8 bit rotate-left constant + +CHACHA_LOOP_64: + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10) + CHACHA_SHUFFLE_SSE(X5, X6, X7) + CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10) + CHACHA_SHUFFLE_SSE(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_64 + + PADDL X0, X4 + PADDL X1, X5 + PADDL X2, X6 + PADDL X3, X7 + PADDQ 4*16(Stack), X3 + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Src + ADDQ $64, Dst + SUBQ $64, Len + JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true. + +BUFFER_KEYSTREAM: + MOVOU X4, 0*16(Buffer) + MOVOU X5, 1*16(Buffer) + MOVOU X6, 2*16(Buffer) + MOVOU X7, 3*16(Buffer) + MOVQ Len, Tmp0 + FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2) + +DONE: + MOVQ SavedSP, Stack // Restore stack pointer + MOVOU X3, 3*16(State) + MOVQ Len, ret+72(FP) + RET + +// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int +TEXT ·xorKeyStreamAVX(SB), 4, $144-80 + MOVQ dst_base+0(FP), Dst + MOVQ src_base+24(FP), Src + MOVQ block+48(FP), Buffer + MOVQ state+56(FP), State + MOVQ rounds+64(FP), Rounds + MOVQ src_len+32(FP), Len + + VMOVDQU 0*16(State), X0 + VMOVDQU 1*16(State), X1 + VMOVDQU 2*16(State), X2 + VMOVDQU 3*16(State), X3 + + MOVQ Stack, SavedSP + ADDQ $16, Stack + ANDQ $-16, Stack + + TESTQ Len, Len + JZ DONE + + VMOVDQU ·one<>(SB), X4 + VMOVDQU ·rol16<>(SB), X5 + VMOVDQU ·rol8<>(SB), X6 + VMOVDQA X0, 0*16(Stack) + VMOVDQA X1, 1*16(Stack) + VMOVDQA X2, 2*16(Stack) + VMOVDQA X3, 3*16(Stack) + VMOVDQA X4, 4*16(Stack) + VMOVDQA X5, 6*16(Stack) + VMOVDQA X6, 7*16(Stack) + + CMPQ Len, $64 + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 + JLE GENERATE_KEYSTREAM_192 + +GENERATE_KEYSTREAM_256: + VMOVDQA X0, X12 + VMOVDQA X1, X13 + VMOVDQA X2, X14 + VMOVDQA X3, X15 + VPADDQ 4*16(Stack), X15, X15 + VMOVDQA X0, X8 + VMOVDQA X1, X9 + VMOVDQA X2, X10 + VMOVDQA X15, X11 + VPADDQ 4*16(Stack), X11, X11 + VMOVDQA X0, X4 + VMOVDQA X1, X5 + VMOVDQA X2, X6 + VMOVDQA X11, X7 + VPADDQ 4*16(Stack), X7, X7 + MOVQ Rounds, Tmp0 + + VMOVDQA X3, 3*16(Stack) // Save X3 + +CHACHA_LOOP_256: + VMOVDQA X4, 5*16(Stack) + CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_AVX(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack)) + VMOVDQA 5*16(Stack), X4 + VMOVDQA X0, 5*16(Stack) + CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack)) + VMOVDQA 5*16(Stack), X0 + CHACHA_SHUFFLE_AVX(X1, X2, X3) + CHACHA_SHUFFLE_AVX(X13, X14, X15) + CHACHA_SHUFFLE_AVX(X9, X10, X11) + CHACHA_SHUFFLE_AVX(X5, X6, X7) + VMOVDQA X4, 5*16(Stack) + CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_AVX(X12, X13, X14, X15, X4, 6*16(Stack), 7*16(Stack)) + VMOVDQA 5*16(Stack), X4 + VMOVDQA X0, 5*16(Stack) + CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, 6*16(Stack), 7*16(Stack)) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, 6*16(Stack), 7*16(Stack)) + VMOVDQA 5*16(Stack), X0 + CHACHA_SHUFFLE_AVX(X3, X2, X1) + CHACHA_SHUFFLE_AVX(X15, X14, X13) + CHACHA_SHUFFLE_AVX(X11, X10, X9) + CHACHA_SHUFFLE_AVX(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_256 + + VPADDD 0*16(Stack), X0, X0 + VPADDD 1*16(Stack), X1, X1 + VPADDD 2*16(Stack), X2, X2 + VPADDD 3*16(Stack), X3, X3 + VMOVDQA X4, 5*16(Stack) // Save X4 + XOR_AVX(Dst, Src, 0, X0, X1, X2, X3, X4) + VMOVDQA 5*16(Stack), X4 // Restore X4 + + VMOVDQA 0*16(Stack), X0 + VMOVDQA 1*16(Stack), X1 + VMOVDQA 2*16(Stack), X2 + VMOVDQA 3*16(Stack), X3 + VPADDQ 4*16(Stack), X3, X3 + + VPADDD X0, X12, X12 + VPADDD X1, X13, X13 + VPADDD X2, X14, X14 + VPADDD X3, X15, X15 + VPADDQ 4*16(Stack), X3, X3 + VPADDD X0, X8, X8 + VPADDD X1, X9, X9 + VPADDD X2, X10, X10 + VPADDD X3, X11, X11 + VPADDQ 4*16(Stack), X3, X3 + VPADDD X0, X4, X4 + VPADDD X1, X5, X5 + VPADDD X2, X6, X6 + VPADDD X3, X7, X7 + VPADDQ 4*16(Stack), X3, X3 + + XOR_AVX(Dst, Src, 64, X12, X13, X14, X15, X0) + XOR_AVX(Dst, Src, 128, X8, X9, X10, X11, X0) + VMOVDQA 0*16(Stack), X0 // Restore X0 + ADDQ $192, Dst + ADDQ $192, Src + SUBQ $192, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + CMPQ Len, $128 // If 64 < Len <= 128 -> gen. only 128 byte keystream. + JLE GENERATE_KEYSTREAM_128 + CMPQ Len, $192 // If Len > 192 -> repeat, otherwise Len > 128 && Len <= 192 -> gen. 192 byte keystream + JG GENERATE_KEYSTREAM_256 + +GENERATE_KEYSTREAM_192: + VMOVDQA X0, X12 + VMOVDQA X1, X13 + VMOVDQA X2, X14 + VMOVDQA X3, X15 + VMOVDQA X0, X8 + VMOVDQA X1, X9 + VMOVDQA X2, X10 + VMOVDQA X3, X11 + VPADDQ 4*16(Stack), X11, X11 + VMOVDQA X0, X4 + VMOVDQA X1, X5 + VMOVDQA X2, X6 + VMOVDQA X11, X7 + VPADDQ 4*16(Stack), X7, X7 + MOVQ Rounds, Tmp0 + + VMOVDQA 6*16(Stack), X1 // Load 16 bit rotate-left constant + VMOVDQA 7*16(Stack), X2 // Load 8 bit rotate-left constant + +CHACHA_LOOP_192: + CHACHA_QROUND_AVX(X12, X13, X14, X15, X0, X1, X2) + CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, X1, X2) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, X1, X2) + CHACHA_SHUFFLE_AVX(X13, X14, X15) + CHACHA_SHUFFLE_AVX(X9, X10, X11) + CHACHA_SHUFFLE_AVX(X5, X6, X7) + CHACHA_QROUND_AVX(X12, X13, X14, X15, X0, X1, X2) + CHACHA_QROUND_AVX(X8, X9, X10, X11, X0, X1, X2) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X0, X1, X2) + CHACHA_SHUFFLE_AVX(X15, X14, X13) + CHACHA_SHUFFLE_AVX(X11, X10, X9) + CHACHA_SHUFFLE_AVX(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_192 + + VMOVDQA 0*16(Stack), X0 // Restore X0 + VMOVDQA 1*16(Stack), X1 // Restore X1 + VMOVDQA 2*16(Stack), X2 // Restore X2 + VPADDD X0, X12, X12 + VPADDD X1, X13, X13 + VPADDD X2, X14, X14 + VPADDD X3, X15, X15 + VPADDQ 4*16(Stack), X3, X3 + VPADDD X0, X8, X8 + VPADDD X1, X9, X9 + VPADDD X2, X10, X10 + VPADDD X3, X11, X11 + VPADDQ 4*16(Stack), X3, X3 + VPADDD X0, X4, X4 + VPADDD X1, X5, X5 + VPADDD X2, X6, X6 + VPADDD X3, X7, X7 + VPADDQ 4*16(Stack), X3, X3 + + XOR_AVX(Dst, Src, 0, X12, X13, X14, X15, X0) + XOR_AVX(Dst, Src, 64, X8, X9, X10, X11, X0) + VMOVDQA 0*16(Stack), X0 // Restore X0 + ADDQ $128, Dst + ADDQ $128, Src + SUBQ $128, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE + CMPQ Len, $64 // If Len <= 64 -> gen. only 64 byte keystream. + JLE GENERATE_KEYSTREAM_64 + +GENERATE_KEYSTREAM_128: + VMOVDQA X0, X8 + VMOVDQA X1, X9 + VMOVDQA X2, X10 + VMOVDQA X3, X11 + VMOVDQA X0, X4 + VMOVDQA X1, X5 + VMOVDQA X2, X6 + VMOVDQA X3, X7 + VPADDQ 4*16(Stack), X7, X7 + MOVQ Rounds, Tmp0 + + VMOVDQA 6*16(Stack), X13 // Load 16 bit rotate-left constant + VMOVDQA 7*16(Stack), X14 // Load 8 bit rotate-left constant + +CHACHA_LOOP_128: + CHACHA_QROUND_AVX(X8, X9, X10, X11, X12, X13, X14) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X12, X13, X14) + CHACHA_SHUFFLE_AVX(X9, X10, X11) + CHACHA_SHUFFLE_AVX(X5, X6, X7) + CHACHA_QROUND_AVX(X8, X9, X10, X11, X12, X13, X14) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X12, X13, X14) + CHACHA_SHUFFLE_AVX(X11, X10, X9) + CHACHA_SHUFFLE_AVX(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_128 + + VPADDD X0, X8, X8 + VPADDD X1, X9, X9 + VPADDD X2, X10, X10 + VPADDD X3, X11, X11 + VPADDQ 4*16(Stack), X3, X3 + VPADDD X0, X4, X4 + VPADDD X1, X5, X5 + VPADDD X2, X6, X6 + VPADDD X3, X7, X7 + VPADDQ 4*16(Stack), X3, X3 + + XOR_AVX(Dst, Src, 0, X8, X9, X10, X11, X12) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Dst + ADDQ $64, Src + SUBQ $64, Len + JZ DONE // If Len == 0 -> DONE, otherwise Len <= 64 -> gen 64 byte keystream + +GENERATE_KEYSTREAM_64: + VMOVDQA X0, X4 + VMOVDQA X1, X5 + VMOVDQA X2, X6 + VMOVDQA X3, X7 + MOVQ Rounds, Tmp0 + + VMOVDQA 6*16(Stack), X9 // Load 16 bit rotate-left constant + VMOVDQA 7*16(Stack), X10 // Load 8 bit rotate-left constant + +CHACHA_LOOP_64: + CHACHA_QROUND_AVX(X4, X5, X6, X7, X8, X9, X10) + CHACHA_SHUFFLE_AVX(X5, X6, X7) + CHACHA_QROUND_AVX(X4, X5, X6, X7, X8, X9, X10) + CHACHA_SHUFFLE_AVX(X7, X6, X5) + SUBQ $2, Tmp0 + JNZ CHACHA_LOOP_64 + + VPADDD X0, X4, X4 + VPADDD X1, X5, X5 + VPADDD X2, X6, X6 + VPADDD X3, X7, X7 + VPADDQ 4*16(Stack), X3, X3 + + CMPQ Len, $64 + JL BUFFER_KEYSTREAM + + XOR_AVX(Dst, Src, 0, X4, X5, X6, X7, X8) + ADDQ $64, Src + ADDQ $64, Dst + SUBQ $64, Len + JMP DONE // jump directly to DONE - there is no keystream to buffer, Len == 0 always true. + +BUFFER_KEYSTREAM: + VMOVDQU X4, 0*16(Buffer) + VMOVDQU X5, 1*16(Buffer) + VMOVDQU X6, 2*16(Buffer) + VMOVDQU X7, 3*16(Buffer) + MOVQ Len, Tmp0 + FINALIZE(Dst, Src, Buffer, Tmp0, Tmp1, Tmp2) + +DONE: + MOVQ SavedSP, Stack // Restore stack pointer + VMOVDQU X3, 3*16(State) + VZEROUPPER + MOVQ Len, ret+72(FP) + RET + +#undef Dst +#undef Src +#undef Len +#undef Rounds +#undef Buffer +#undef State +#undef Stack +#undef SavedSP +#undef Tmp0 +#undef Tmp1 +#undef Tmp2 diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_generic.go b/vendor/github.com/aead/chacha20/chacha/chacha_generic.go new file mode 100644 index 0000000..8832d5b --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_generic.go @@ -0,0 +1,319 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +package chacha + +import "encoding/binary" + +var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574} + +func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int { + for len(src) >= 64 { + chachaGeneric(block, state, rounds) + + for i, v := range block { + dst[i] = src[i] ^ v + } + src = src[64:] + dst = dst[64:] + } + + n := len(src) + if n > 0 { + chachaGeneric(block, state, rounds) + for i, v := range src { + dst[i] = v ^ block[i] + } + } + return n +} + +func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) { + v00 := binary.LittleEndian.Uint32(state[0:]) + v01 := binary.LittleEndian.Uint32(state[4:]) + v02 := binary.LittleEndian.Uint32(state[8:]) + v03 := binary.LittleEndian.Uint32(state[12:]) + v04 := binary.LittleEndian.Uint32(state[16:]) + v05 := binary.LittleEndian.Uint32(state[20:]) + v06 := binary.LittleEndian.Uint32(state[24:]) + v07 := binary.LittleEndian.Uint32(state[28:]) + v08 := binary.LittleEndian.Uint32(state[32:]) + v09 := binary.LittleEndian.Uint32(state[36:]) + v10 := binary.LittleEndian.Uint32(state[40:]) + v11 := binary.LittleEndian.Uint32(state[44:]) + v12 := binary.LittleEndian.Uint32(state[48:]) + v13 := binary.LittleEndian.Uint32(state[52:]) + v14 := binary.LittleEndian.Uint32(state[56:]) + v15 := binary.LittleEndian.Uint32(state[60:]) + + s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07 + s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15 + + for i := 0; i < rounds; i += 2 { + v00 += v04 + v12 ^= v00 + v12 = (v12 << 16) | (v12 >> 16) + v08 += v12 + v04 ^= v08 + v04 = (v04 << 12) | (v04 >> 20) + v00 += v04 + v12 ^= v00 + v12 = (v12 << 8) | (v12 >> 24) + v08 += v12 + v04 ^= v08 + v04 = (v04 << 7) | (v04 >> 25) + v01 += v05 + v13 ^= v01 + v13 = (v13 << 16) | (v13 >> 16) + v09 += v13 + v05 ^= v09 + v05 = (v05 << 12) | (v05 >> 20) + v01 += v05 + v13 ^= v01 + v13 = (v13 << 8) | (v13 >> 24) + v09 += v13 + v05 ^= v09 + v05 = (v05 << 7) | (v05 >> 25) + v02 += v06 + v14 ^= v02 + v14 = (v14 << 16) | (v14 >> 16) + v10 += v14 + v06 ^= v10 + v06 = (v06 << 12) | (v06 >> 20) + v02 += v06 + v14 ^= v02 + v14 = (v14 << 8) | (v14 >> 24) + v10 += v14 + v06 ^= v10 + v06 = (v06 << 7) | (v06 >> 25) + v03 += v07 + v15 ^= v03 + v15 = (v15 << 16) | (v15 >> 16) + v11 += v15 + v07 ^= v11 + v07 = (v07 << 12) | (v07 >> 20) + v03 += v07 + v15 ^= v03 + v15 = (v15 << 8) | (v15 >> 24) + v11 += v15 + v07 ^= v11 + v07 = (v07 << 7) | (v07 >> 25) + v00 += v05 + v15 ^= v00 + v15 = (v15 << 16) | (v15 >> 16) + v10 += v15 + v05 ^= v10 + v05 = (v05 << 12) | (v05 >> 20) + v00 += v05 + v15 ^= v00 + v15 = (v15 << 8) | (v15 >> 24) + v10 += v15 + v05 ^= v10 + v05 = (v05 << 7) | (v05 >> 25) + v01 += v06 + v12 ^= v01 + v12 = (v12 << 16) | (v12 >> 16) + v11 += v12 + v06 ^= v11 + v06 = (v06 << 12) | (v06 >> 20) + v01 += v06 + v12 ^= v01 + v12 = (v12 << 8) | (v12 >> 24) + v11 += v12 + v06 ^= v11 + v06 = (v06 << 7) | (v06 >> 25) + v02 += v07 + v13 ^= v02 + v13 = (v13 << 16) | (v13 >> 16) + v08 += v13 + v07 ^= v08 + v07 = (v07 << 12) | (v07 >> 20) + v02 += v07 + v13 ^= v02 + v13 = (v13 << 8) | (v13 >> 24) + v08 += v13 + v07 ^= v08 + v07 = (v07 << 7) | (v07 >> 25) + v03 += v04 + v14 ^= v03 + v14 = (v14 << 16) | (v14 >> 16) + v09 += v14 + v04 ^= v09 + v04 = (v04 << 12) | (v04 >> 20) + v03 += v04 + v14 ^= v03 + v14 = (v14 << 8) | (v14 >> 24) + v09 += v14 + v04 ^= v09 + v04 = (v04 << 7) | (v04 >> 25) + } + + v00 += s00 + v01 += s01 + v02 += s02 + v03 += s03 + v04 += s04 + v05 += s05 + v06 += s06 + v07 += s07 + v08 += s08 + v09 += s09 + v10 += s10 + v11 += s11 + v12 += s12 + v13 += s13 + v14 += s14 + v15 += s15 + + s12++ + binary.LittleEndian.PutUint32(state[48:], s12) + if s12 == 0 { // indicates overflow + s13++ + binary.LittleEndian.PutUint32(state[52:], s13) + } + + binary.LittleEndian.PutUint32(dst[0:], v00) + binary.LittleEndian.PutUint32(dst[4:], v01) + binary.LittleEndian.PutUint32(dst[8:], v02) + binary.LittleEndian.PutUint32(dst[12:], v03) + binary.LittleEndian.PutUint32(dst[16:], v04) + binary.LittleEndian.PutUint32(dst[20:], v05) + binary.LittleEndian.PutUint32(dst[24:], v06) + binary.LittleEndian.PutUint32(dst[28:], v07) + binary.LittleEndian.PutUint32(dst[32:], v08) + binary.LittleEndian.PutUint32(dst[36:], v09) + binary.LittleEndian.PutUint32(dst[40:], v10) + binary.LittleEndian.PutUint32(dst[44:], v11) + binary.LittleEndian.PutUint32(dst[48:], v12) + binary.LittleEndian.PutUint32(dst[52:], v13) + binary.LittleEndian.PutUint32(dst[56:], v14) + binary.LittleEndian.PutUint32(dst[60:], v15) +} + +func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) { + v00 := sigma[0] + v01 := sigma[1] + v02 := sigma[2] + v03 := sigma[3] + v04 := binary.LittleEndian.Uint32(key[0:]) + v05 := binary.LittleEndian.Uint32(key[4:]) + v06 := binary.LittleEndian.Uint32(key[8:]) + v07 := binary.LittleEndian.Uint32(key[12:]) + v08 := binary.LittleEndian.Uint32(key[16:]) + v09 := binary.LittleEndian.Uint32(key[20:]) + v10 := binary.LittleEndian.Uint32(key[24:]) + v11 := binary.LittleEndian.Uint32(key[28:]) + v12 := binary.LittleEndian.Uint32(nonce[0:]) + v13 := binary.LittleEndian.Uint32(nonce[4:]) + v14 := binary.LittleEndian.Uint32(nonce[8:]) + v15 := binary.LittleEndian.Uint32(nonce[12:]) + + for i := 0; i < 20; i += 2 { + v00 += v04 + v12 ^= v00 + v12 = (v12 << 16) | (v12 >> 16) + v08 += v12 + v04 ^= v08 + v04 = (v04 << 12) | (v04 >> 20) + v00 += v04 + v12 ^= v00 + v12 = (v12 << 8) | (v12 >> 24) + v08 += v12 + v04 ^= v08 + v04 = (v04 << 7) | (v04 >> 25) + v01 += v05 + v13 ^= v01 + v13 = (v13 << 16) | (v13 >> 16) + v09 += v13 + v05 ^= v09 + v05 = (v05 << 12) | (v05 >> 20) + v01 += v05 + v13 ^= v01 + v13 = (v13 << 8) | (v13 >> 24) + v09 += v13 + v05 ^= v09 + v05 = (v05 << 7) | (v05 >> 25) + v02 += v06 + v14 ^= v02 + v14 = (v14 << 16) | (v14 >> 16) + v10 += v14 + v06 ^= v10 + v06 = (v06 << 12) | (v06 >> 20) + v02 += v06 + v14 ^= v02 + v14 = (v14 << 8) | (v14 >> 24) + v10 += v14 + v06 ^= v10 + v06 = (v06 << 7) | (v06 >> 25) + v03 += v07 + v15 ^= v03 + v15 = (v15 << 16) | (v15 >> 16) + v11 += v15 + v07 ^= v11 + v07 = (v07 << 12) | (v07 >> 20) + v03 += v07 + v15 ^= v03 + v15 = (v15 << 8) | (v15 >> 24) + v11 += v15 + v07 ^= v11 + v07 = (v07 << 7) | (v07 >> 25) + v00 += v05 + v15 ^= v00 + v15 = (v15 << 16) | (v15 >> 16) + v10 += v15 + v05 ^= v10 + v05 = (v05 << 12) | (v05 >> 20) + v00 += v05 + v15 ^= v00 + v15 = (v15 << 8) | (v15 >> 24) + v10 += v15 + v05 ^= v10 + v05 = (v05 << 7) | (v05 >> 25) + v01 += v06 + v12 ^= v01 + v12 = (v12 << 16) | (v12 >> 16) + v11 += v12 + v06 ^= v11 + v06 = (v06 << 12) | (v06 >> 20) + v01 += v06 + v12 ^= v01 + v12 = (v12 << 8) | (v12 >> 24) + v11 += v12 + v06 ^= v11 + v06 = (v06 << 7) | (v06 >> 25) + v02 += v07 + v13 ^= v02 + v13 = (v13 << 16) | (v13 >> 16) + v08 += v13 + v07 ^= v08 + v07 = (v07 << 12) | (v07 >> 20) + v02 += v07 + v13 ^= v02 + v13 = (v13 << 8) | (v13 >> 24) + v08 += v13 + v07 ^= v08 + v07 = (v07 << 7) | (v07 >> 25) + v03 += v04 + v14 ^= v03 + v14 = (v14 << 16) | (v14 >> 16) + v09 += v14 + v04 ^= v09 + v04 = (v04 << 12) | (v04 >> 20) + v03 += v04 + v14 ^= v03 + v14 = (v14 << 8) | (v14 >> 24) + v09 += v14 + v04 ^= v09 + v04 = (v04 << 7) | (v04 >> 25) + } + + binary.LittleEndian.PutUint32(out[0:], v00) + binary.LittleEndian.PutUint32(out[4:], v01) + binary.LittleEndian.PutUint32(out[8:], v02) + binary.LittleEndian.PutUint32(out[12:], v03) + binary.LittleEndian.PutUint32(out[16:], v12) + binary.LittleEndian.PutUint32(out[20:], v13) + binary.LittleEndian.PutUint32(out[24:], v14) + binary.LittleEndian.PutUint32(out[28:], v15) +} diff --git a/vendor/github.com/aead/chacha20/chacha/chacha_ref.go b/vendor/github.com/aead/chacha20/chacha/chacha_ref.go new file mode 100644 index 0000000..526877c --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/chacha_ref.go @@ -0,0 +1,33 @@ +// Copyright (c) 2016 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build !amd64,!386 gccgo appengine nacl + +package chacha + +import "encoding/binary" + +func init() { + useSSE2 = false + useSSSE3 = false + useAVX = false + useAVX2 = false +} + +func initialize(state *[64]byte, key []byte, nonce *[16]byte) { + binary.LittleEndian.PutUint32(state[0:], sigma[0]) + binary.LittleEndian.PutUint32(state[4:], sigma[1]) + binary.LittleEndian.PutUint32(state[8:], sigma[2]) + binary.LittleEndian.PutUint32(state[12:], sigma[3]) + copy(state[16:], key[:]) + copy(state[48:], nonce[:]) +} + +func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { + return xorKeyStreamGeneric(dst, src, block, state, rounds) +} + +func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { + hChaCha20Generic(out, nonce, key) +} diff --git a/vendor/github.com/aead/chacha20/chacha/const.s b/vendor/github.com/aead/chacha20/chacha/const.s new file mode 100644 index 0000000..c7a94a4 --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/const.s @@ -0,0 +1,53 @@ +// Copyright (c) 2018 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl + +#include "textflag.h" + +DATA ·sigma<>+0x00(SB)/4, $0x61707865 +DATA ·sigma<>+0x04(SB)/4, $0x3320646e +DATA ·sigma<>+0x08(SB)/4, $0x79622d32 +DATA ·sigma<>+0x0C(SB)/4, $0x6b206574 +GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants + +// SSE2/SSE3/AVX constants + +DATA ·one<>+0x00(SB)/8, $1 +DATA ·one<>+0x08(SB)/8, $0 +GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value + +DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302 +DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A +GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant + +DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003 +DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B +GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant + +// AVX2 constants + +DATA ·one_AVX2<>+0x00(SB)/8, $0 +DATA ·one_AVX2<>+0x08(SB)/8, $0 +DATA ·one_AVX2<>+0x10(SB)/8, $1 +DATA ·one_AVX2<>+0x18(SB)/8, $0 +GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value + +DATA ·two_AVX2<>+0x00(SB)/8, $2 +DATA ·two_AVX2<>+0x08(SB)/8, $0 +DATA ·two_AVX2<>+0x10(SB)/8, $2 +DATA ·two_AVX2<>+0x18(SB)/8, $0 +GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32 + +DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302 +DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A +DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302 +DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A +GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant + +DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003 +DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B +DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003 +DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B +GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant diff --git a/vendor/github.com/aead/chacha20/chacha/macro.s b/vendor/github.com/aead/chacha20/chacha/macro.s new file mode 100644 index 0000000..780108f --- /dev/null +++ b/vendor/github.com/aead/chacha20/chacha/macro.s @@ -0,0 +1,163 @@ +// Copyright (c) 2018 Andreas Auernhammer. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl + +// ROTL_SSE rotates all 4 32 bit values of the XMM register v +// left by n bits using SSE2 instructions (0 <= n <= 32). +// The XMM register t is used as a temp. register. +#define ROTL_SSE(n, t, v) \ + MOVO v, t; \ + PSLLL $n, t; \ + PSRLL $(32-n), v; \ + PXOR t, v + +// ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v +// left by n bits using AVX/AVX2 instructions (0 <= n <= 32). +// The AVX/AVX2 register t is used as a temp. register. +#define ROTL_AVX(n, t, v) \ + VPSLLD $n, v, t; \ + VPSRLD $(32-n), v, v; \ + VPXOR v, t, v + +// CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the +// 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for +// rotations. The XMM register t is used as a temp. register. +#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE(16, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE(12, t, v1); \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE(8, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE(7, t, v1) + +// CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the +// 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit +// rotations. The XMM register t is used as a temp. register. +// +// r16 holds the PSHUFB constant for a 16 bit left rotate. +// r8 holds the PSHUFB constant for a 8 bit left rotate. +#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \ + PADDL v1, v0; \ + PXOR v0, v3; \ + PSHUFB r16, v3; \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE(12, t, v1); \ + PADDL v1, v0; \ + PXOR v0, v3; \ + PSHUFB r8, v3; \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE(7, t, v1) + +// CHACHA_QROUND_AVX performs a ChaCha quarter-round using the +// 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit +// rotations. The AVX/AVX2 register t is used as a temp. register. +// +// r16 holds the VPSHUFB constant for a 16 bit left rotate. +// r8 holds the VPSHUFB constant for a 8 bit left rotate. +#define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \ + VPADDD v0, v1, v0; \ + VPXOR v3, v0, v3; \ + VPSHUFB r16, v3, v3; \ + VPADDD v2, v3, v2; \ + VPXOR v1, v2, v1; \ + ROTL_AVX(12, t, v1); \ + VPADDD v0, v1, v0; \ + VPXOR v3, v0, v3; \ + VPSHUFB r8, v3, v3; \ + VPADDD v2, v3, v2; \ + VPXOR v1, v2, v1; \ + ROTL_AVX(7, t, v1) + +// CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the +// 3 XMM registers v1, v2 and v3. The inverse shuffle is +// performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1). +#define CHACHA_SHUFFLE_SSE(v1, v2, v3) \ + PSHUFL $0x39, v1, v1; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v3, v3 + +// CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the +// 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is +// performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1). +#define CHACHA_SHUFFLE_AVX(v1, v2, v3) \ + VPSHUFD $0x39, v1, v1; \ + VPSHUFD $0x4E, v2, v2; \ + VPSHUFD $0x93, v3, v3 + +// XOR_SSE extracts 4x16 byte vectors from src at +// off, xors all vectors with the corresponding XMM +// register (v0 - v3) and writes the result to dst +// at off. +// The XMM register t is used as a temp. register. +#define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \ + MOVOU 0+off(src), t; \ + PXOR v0, t; \ + MOVOU t, 0+off(dst); \ + MOVOU 16+off(src), t; \ + PXOR v1, t; \ + MOVOU t, 16+off(dst); \ + MOVOU 32+off(src), t; \ + PXOR v2, t; \ + MOVOU t, 32+off(dst); \ + MOVOU 48+off(src), t; \ + PXOR v3, t; \ + MOVOU t, 48+off(dst) + +// XOR_AVX extracts 4x16 byte vectors from src at +// off, xors all vectors with the corresponding AVX +// register (v0 - v3) and writes the result to dst +// at off. +// The XMM register t is used as a temp. register. +#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \ + VPXOR 0+off(src), v0, t; \ + VMOVDQU t, 0+off(dst); \ + VPXOR 16+off(src), v1, t; \ + VMOVDQU t, 16+off(dst); \ + VPXOR 32+off(src), v2, t; \ + VMOVDQU t, 32+off(dst); \ + VPXOR 48+off(src), v3, t; \ + VMOVDQU t, 48+off(dst) + +#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ + VMOVDQU (0+off)(src), t0; \ + VPERM2I128 $32, v1, v0, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (0+off)(dst); \ + VMOVDQU (32+off)(src), t0; \ + VPERM2I128 $32, v3, v2, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (32+off)(dst); \ + VMOVDQU (64+off)(src), t0; \ + VPERM2I128 $49, v1, v0, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (64+off)(dst); \ + VMOVDQU (96+off)(src), t0; \ + VPERM2I128 $49, v3, v2, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (96+off)(dst) + +#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ + VMOVDQU (0+off)(src), t0; \ + VPERM2I128 $32, v1, v0, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (0+off)(dst); \ + VMOVDQU (32+off)(src), t0; \ + VPERM2I128 $32, v3, v2, t1; \ + VPXOR t0, t1, t0; \ + VMOVDQU t0, (32+off)(dst); \ + +#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \ + VPERM2I128 $49, v1, v0, t0; \ + VMOVDQU t0, 0(dst); \ + VPERM2I128 $49, v3, v2, t0; \ + VMOVDQU t0, 32(dst) diff --git a/vendor/modules.txt b/vendor/modules.txt index 35598c6..3c7837b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -3,9 +3,9 @@ blitter.com/go/chacha20 blitter.com/go/chacha20/internal/api blitter.com/go/chacha20/internal/hardware blitter.com/go/chacha20/internal/ref -# blitter.com/go/cryptmt v1.0.0 +# blitter.com/go/cryptmt v1.0.2 blitter.com/go/cryptmt -# blitter.com/go/goutmp v1.0.1 +# blitter.com/go/goutmp v1.0.2 blitter.com/go/goutmp # blitter.com/go/herradurakex v1.0.0 blitter.com/go/herradurakex @@ -15,8 +15,8 @@ blitter.com/go/kyber blitter.com/go/mtwist # blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae blitter.com/go/newhope -# blitter.com/go/wanderer v0.8.1 -blitter.com/go/wanderer +# github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da +github.com/aead/chacha20/chacha # github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f github.com/jameskeane/bcrypt # github.com/klauspost/cpuid v1.2.2 diff --git a/xs/xs.go b/xs/xs.go index c670e25..ce924bc 100755 --- a/xs/xs.go +++ b/xs/xs.go @@ -420,7 +420,7 @@ func doShellMode(isInteractive bool, conn *xsnet.Conn, oldState *xs.State, rec * // exit with inerr == nil _, inerr := io.Copy(os.Stdout, conn) if inerr != nil { - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // #nosec + restoreTermState(oldState) // Copy operations and user logging off will cause // a "use of closed network connection" so handle that // gracefully here @@ -435,7 +435,7 @@ func doShellMode(isInteractive bool, conn *xsnet.Conn, oldState *xs.State, rec * if isInteractive { log.Println("[* Got EOF *]") - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // #nosec + restoreTermState(oldState) exitWithStatus(int(rec.Status())) } } @@ -463,7 +463,7 @@ func doShellMode(isInteractive bool, conn *xsnet.Conn, oldState *xs.State, rec * if outerr != nil { log.Println(outerr) fmt.Println(outerr) - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // #nosec + restoreTermState(oldState) log.Println("[Hanging up]") exitWithStatus(0) } @@ -624,7 +624,7 @@ func main() { flag.BoolVar(&vopt, "v", false, "show version") flag.BoolVar(&dbg, "d", false, "debug logging") - flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1]") + flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1 | C_CHACHA20_12]") flag.StringVar(&hmacAlg, "m", "H_SHA256", "session `HMAC` [H_SHA256 | H_SHA512]") flag.StringVar(&kexAlg, "k", "KEX_HERRADURA512", "KEx `alg` [KEX_HERRADURA{256/512/1024/2048} | KEX_KYBER{512/768/1024} | KEX_NEWHOPE | KEX_NEWHOPE_SIMPLE]") flag.StringVar(&kcpMode, "K", "unused", "KCP `alg`, one of [KCP_NONE | KCP_AES | KCP_BLOWFISH | KCP_CAST5 | KCP_SM4 | KCP_SALSA20 | KCP_SIMPLEXOR | KCP_TEA | KCP_3DES | KCP_TWOFISH | KCP_XTEA] to use KCP (github.com/xtaci/kcp-go) reliable UDP instead of TCP") @@ -848,12 +848,17 @@ func main() { } // #gv:s/label=\"main\$1\"/label=\"deferRestore\"/ // TODO:.gv:main:1:deferRestore - defer func() { _ = xs.Restore(int(os.Stdin.Fd()), oldState) }() // nolint: errcheck,gosec + defer restoreTermState(oldState) } else { log.Println("NOT A TTY") } } + // Start login timeout here and disconnect if user/pass phase stalls + loginTimeout := time.AfterFunc(30*time.Second, func() { + fmt.Printf(" .. [login timeout]") + }) + if len(authCookie) == 0 { //No auth token, prompt for password fmt.Printf("Gimme cookie:") @@ -864,6 +869,8 @@ func main() { } authCookie = string(ab) } + + _ = loginTimeout.Stop() // Security scrub runtime.GC() @@ -871,9 +878,9 @@ func main() { rec := xs.NewSession(op, []byte(uname), []byte(remoteHost), []byte(os.Getenv("TERM")), []byte(cmdStr), []byte(authCookie), 0) sendErr := sendSessionParams(&conn, rec) if sendErr != nil { - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // nolint: errcheck,gosec + restoreTermState(oldState) rec.SetStatus(254) - fmt.Fprintln(os.Stderr, "Error: server rejected secure proposal params") // nolint: errcheck + fmt.Fprintln(os.Stderr, "Error: server rejected secure proposal params or login timed out") // nolint: errcheck exitWithStatus(int(rec.Status())) //log.Fatal(sendErr) } @@ -930,19 +937,23 @@ func main() { } if rec.Status() != 0 { - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // nolint: errcheck,gosec + restoreTermState(oldState) fmt.Fprintln(os.Stderr, "Session exited with status:", rec.Status()) // nolint: errcheck } } if oldState != nil { - _ = xs.Restore(int(os.Stdin.Fd()), oldState) // nolint: gosec + restoreTermState(oldState) oldState = nil } exitWithStatus(int(rec.Status())) } +func restoreTermState(oldState *xs.State) { + _ = xs.Restore(int(os.Stdin.Fd()), oldState) // nolint: errcheck,gosec +} + // exitWithStatus wraps os.Exit() plus does any required pprof housekeeping func exitWithStatus(status int) { if cpuprofile != "" { diff --git a/xsd/xsd.go b/xsd/xsd.go index 23d6f25..584f080 100755 --- a/xsd/xsd.go +++ b/xsd/xsd.go @@ -641,6 +641,13 @@ func main() { go func(hc *xsnet.Conn) (e error) { defer hc.Close() // nolint: errcheck + // Start login timeout here and disconnect if user/pass phase stalls + loginTimeout := time.AfterFunc(30*time.Second, func() { + logger.LogNotice(fmt.Sprintln("Login timed out")) // nolint: errcheck,gosec + hc.Write([]byte{0}) // nolint: gosec,errcheck + hc.Close() + }) + //We use io.ReadFull() here to guarantee we consume //just the data we want for the xs.Session, and no more. //Otherwise data will be sitting in the channel that isn't @@ -709,17 +716,18 @@ func main() { var valid bool var allowedCmds string // Currently unused - if xs.AuthUserByToken(string(rec.Who()), string(rec.ConnHost()), string(rec.AuthCookie(true))) { + if xs.AuthUserByToken(xs.NewAuthCtx(), string(rec.Who()), string(rec.ConnHost()), string(rec.AuthCookie(true))) { valid = true } else { if useSystemPasswd { //var passErr error - valid, _ /*passErr*/ = xs.VerifyPass(string(rec.Who()), string(rec.AuthCookie(true))) + valid, _ /*passErr*/ = xs.VerifyPass(xs.NewAuthCtx(), string(rec.Who()), string(rec.AuthCookie(true))) } else { - valid, allowedCmds = xs.AuthUserByPasswd(string(rec.Who()), string(rec.AuthCookie(true)), "/etc/xs.passwd") + valid, allowedCmds = xs.AuthUserByPasswd(xs.NewAuthCtx(), string(rec.Who()), string(rec.AuthCookie(true)), "/etc/xs.passwd") } } + _ = loginTimeout.Stop() // Security scrub rec.ClearAuthCookie() diff --git a/xsnet/chan.go b/xsnet/chan.go index cfc3722..79dfd68 100644 --- a/xsnet/chan.go +++ b/xsnet/chan.go @@ -21,7 +21,7 @@ import ( "log" "blitter.com/go/cryptmt" - "blitter.com/go/wanderer" + "github.com/aead/chacha20/chacha" "golang.org/x/crypto/blowfish" "golang.org/x/crypto/twofish" @@ -103,11 +103,20 @@ func (hc Conn) getStream(keymat []byte) (rc cipher.Stream, mc hash.Hash, err err rc = cipher.NewOFB(block, iv) log.Printf("[cipher BLOWFISH_64 (%d)]\n", copts) case CAlgCryptMT1: - rc = cryptmt.NewCipher(keymat) + rc = cryptmt.New(nil, nil, keymat) log.Printf("[cipher CRYPTMT1 (%d)]\n", copts) - case CAlgWanderer: - rc = wanderer.NewCodec(nil, nil, 1, keymat, 3, 3) - log.Printf("[cipher WANDERER mode 1 (%d)]\n", copts) + case CAlgChaCha20_12: + keymat = expandKeyMat(keymat, chacha.KeySize) + key = keymat[0:chacha.KeySize] + ivlen = chacha.INonceSize + iv = keymat[chacha.KeySize : chacha.KeySize+ivlen] + rc, err = chacha.NewCipher(iv, key, 20) + if err != nil { + log.Printf("[ChaCha20 config error]\n") + fmt.Printf("[ChaCha20 config error]\n") + } + // TODO: SetCounter() to something derived from key or nonce or extra keymat? + log.Printf("[cipher CHACHA20_12 (%d)]\n", copts) default: log.Printf("[invalid cipher (%d)]\n", copts) fmt.Printf("DOOFUS SET A VALID CIPHER ALG (%d)\n", copts) diff --git a/xsnet/consts.go b/xsnet/consts.go index a32d501..840937a 100644 --- a/xsnet/consts.go +++ b/xsnet/consts.go @@ -99,7 +99,7 @@ const ( CAlgTwofish128 // golang.org/x/crypto/twofish CAlgBlowfish64 // golang.org/x/crypto/blowfish CAlgCryptMT1 //cryptmt using mtwist64 - CAlgWanderer // inhouse experimental crypto alg + CAlgChaCha20_12 CAlgNoneDisallowed ) diff --git a/xsnet/net.go b/xsnet/net.go index 421b552..1210613 100644 --- a/xsnet/net.go +++ b/xsnet/net.go @@ -41,9 +41,9 @@ import ( "time" hkex "blitter.com/go/herradurakex" - "blitter.com/go/xs/logger" "blitter.com/go/kyber" "blitter.com/go/newhope" + "blitter.com/go/xs/logger" ) /*---------------------------------------------------------------------*/ @@ -145,8 +145,8 @@ func (c *CSCipherAlg) String() string { return "C_BLOWFISH_64" case CAlgCryptMT1: return "C_CRYPTMT1" - case CAlgWanderer: - return "C_WANDERER" + case CAlgChaCha20_12: + return "C_CHACHA20_12" default: return "C_ERR_UNK" } @@ -282,6 +282,8 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { hc.kex = KEX_HERRADURA512 log.Printf("[KEx alg %d ?? defaults to %d]\n", kexAlg, hc.kex) } + + //hc.logCipherText = true // !!! DEBUGGING ONLY !!! NEVER DEPLOY this uncommented !!! return } @@ -300,7 +302,7 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { // // Session (symmetric) crypto // -// C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 +// C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 C_CHACHA20_12 // // Session HMACs // @@ -324,10 +326,10 @@ func (hc *Conn) applyConnExtensions(extensions ...string) { log.Println("[extension arg = C_CRYPTMT1]") hc.cipheropts &= (0xFFFFFF00) hc.cipheropts |= CAlgCryptMT1 - case "C_WANDERER": - log.Println("[extension arg = C_WANDERER]") + case "C_CHACHA20_12": + log.Println("[extension arg = C_CHACHA20_12]") hc.cipheropts &= (0xFFFFFF00) - hc.cipheropts |= CAlgWanderer + hc.cipheropts |= CAlgChaCha20_12 case "H_SHA256": log.Println("[extension arg = H_SHA256]") hc.cipheropts &= (0xFFFF00FF)