175 lines
4.6 KiB
Go
175 lines
4.6 KiB
Go
|
package origin
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"net"
|
||
|
"time"
|
||
|
|
||
|
log "github.com/Sirupsen/logrus"
|
||
|
"golang.org/x/net/context"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
// Waiting time before retrying a failed tunnel connection
|
||
|
tunnelRetryDuration = time.Minute
|
||
|
// Limit on the exponential backoff time period. (2^5 = 32 minutes)
|
||
|
tunnelRetryLimit = 5
|
||
|
// SRV record resolution TTL
|
||
|
resolveTTL = time.Hour
|
||
|
)
|
||
|
|
||
|
type Supervisor struct {
|
||
|
config *TunnelConfig
|
||
|
edgeIPs []*net.TCPAddr
|
||
|
lastResolve time.Time
|
||
|
resolverC chan resolveResult
|
||
|
tunnelErrors chan tunnelError
|
||
|
tunnelsConnecting map[int]chan struct{}
|
||
|
nextConnectedIndex int
|
||
|
nextConnectedSignal chan struct{}
|
||
|
}
|
||
|
|
||
|
type resolveResult struct {
|
||
|
edgeIPs []*net.TCPAddr
|
||
|
err error
|
||
|
}
|
||
|
|
||
|
type tunnelError struct {
|
||
|
index int
|
||
|
err error
|
||
|
}
|
||
|
|
||
|
func NewSupervisor(config *TunnelConfig) *Supervisor {
|
||
|
return &Supervisor{
|
||
|
config: config,
|
||
|
tunnelErrors: make(chan tunnelError),
|
||
|
tunnelsConnecting: map[int]chan struct{}{},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) Run(ctx context.Context, connectedSignal chan struct{}) error {
|
||
|
if err := s.initialize(ctx, connectedSignal); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
tunnelsActive := s.config.HAConnections
|
||
|
tunnelsWaiting := []int{}
|
||
|
backoff := BackoffHandler{MaxRetries: tunnelRetryLimit, BaseTime: tunnelRetryDuration, RetryForever: true}
|
||
|
var backoffTimer <-chan time.Time
|
||
|
for tunnelsActive > 0 {
|
||
|
select {
|
||
|
// Context cancelled
|
||
|
case <-ctx.Done():
|
||
|
for tunnelsActive > 0 {
|
||
|
<-s.tunnelErrors
|
||
|
tunnelsActive--
|
||
|
}
|
||
|
return nil
|
||
|
// startTunnel returned with error
|
||
|
// (note that this may also be caused by context cancellation)
|
||
|
case tunnelError := <-s.tunnelErrors:
|
||
|
tunnelsActive--
|
||
|
if tunnelError.err != nil {
|
||
|
log.WithError(tunnelError.err).Warn("Tunnel disconnected due to error")
|
||
|
tunnelsWaiting = append(tunnelsWaiting, tunnelError.index)
|
||
|
s.waitForNextTunnel(tunnelError.index)
|
||
|
if backoffTimer != nil {
|
||
|
backoffTimer = backoff.BackoffTimer()
|
||
|
}
|
||
|
s.refreshEdgeIPs()
|
||
|
}
|
||
|
// Backoff was set and its timer expired
|
||
|
case <-backoffTimer:
|
||
|
backoffTimer = nil
|
||
|
for _, index := range tunnelsWaiting {
|
||
|
go s.startTunnel(ctx, index, s.newConnectedTunnelSignal(index))
|
||
|
}
|
||
|
tunnelsActive += len(tunnelsWaiting)
|
||
|
tunnelsWaiting = nil
|
||
|
// Tunnel successfully connected
|
||
|
case <-s.nextConnectedSignal:
|
||
|
if !s.waitForNextTunnel(s.nextConnectedIndex) && len(tunnelsWaiting) == 0 {
|
||
|
// No more tunnels outstanding, clear backoff timer
|
||
|
backoff.SetGracePeriod()
|
||
|
}
|
||
|
// DNS resolution returned
|
||
|
case result := <-s.resolverC:
|
||
|
s.lastResolve = time.Now()
|
||
|
s.resolverC = nil
|
||
|
if result.err == nil {
|
||
|
log.Debug("Service discovery refresh complete")
|
||
|
s.edgeIPs = result.edgeIPs
|
||
|
} else {
|
||
|
log.WithError(result.err).Error("Service discovery error")
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return fmt.Errorf("All tunnels terminated")
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) initialize(ctx context.Context, connectedSignal chan struct{}) error {
|
||
|
edgeIPs, err := ResolveEdgeIPs(s.config.EdgeAddrs)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
s.edgeIPs = edgeIPs
|
||
|
s.lastResolve = time.Now()
|
||
|
go s.startTunnel(ctx, 0, connectedSignal)
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
<-s.tunnelErrors
|
||
|
return nil
|
||
|
case tunnelError := <-s.tunnelErrors:
|
||
|
return tunnelError.err
|
||
|
case <-connectedSignal:
|
||
|
}
|
||
|
// At least one successful connection, so start the rest
|
||
|
for i := 1; i < s.config.HAConnections; i++ {
|
||
|
go s.startTunnel(ctx, i, make(chan struct{}))
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// startTunnel starts a new tunnel connection. The resulting error will be sent on
|
||
|
// s.tunnelErrors.
|
||
|
func (s *Supervisor) startTunnel(ctx context.Context, index int, connectedSignal chan struct{}) {
|
||
|
err := ServeTunnelLoop(ctx, s.config, s.getEdgeIP(index), uint8(index), connectedSignal)
|
||
|
s.tunnelErrors <- tunnelError{index: index, err: err}
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) newConnectedTunnelSignal(index int) chan struct{} {
|
||
|
signal := make(chan struct{})
|
||
|
s.tunnelsConnecting[index] = signal
|
||
|
s.nextConnectedSignal = signal
|
||
|
s.nextConnectedIndex = index
|
||
|
return signal
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) waitForNextTunnel(index int) bool {
|
||
|
delete(s.tunnelsConnecting, index)
|
||
|
s.nextConnectedSignal = nil
|
||
|
for k, v := range s.tunnelsConnecting {
|
||
|
s.nextConnectedIndex = k
|
||
|
s.nextConnectedSignal = v
|
||
|
return true
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) getEdgeIP(index int) *net.TCPAddr {
|
||
|
return s.edgeIPs[index%len(s.edgeIPs)]
|
||
|
}
|
||
|
|
||
|
func (s *Supervisor) refreshEdgeIPs() {
|
||
|
if s.resolverC != nil {
|
||
|
return
|
||
|
}
|
||
|
if time.Since(s.lastResolve) < resolveTTL {
|
||
|
return
|
||
|
}
|
||
|
s.resolverC = make(chan resolveResult)
|
||
|
go func() {
|
||
|
edgeIPs, err := ResolveEdgeIPs(s.config.EdgeAddrs)
|
||
|
s.resolverC <- resolveResult{edgeIPs: edgeIPs, err: err}
|
||
|
}()
|
||
|
}
|