TUN-7965: Remove legacy incident status page check
This commit is contained in:
parent
c1d8c5e960
commit
e0a55f9c0e
|
@ -228,7 +228,6 @@ func prepareTunnelConfig(
|
|||
EdgeIPVersion: edgeIPVersion,
|
||||
EdgeBindAddr: edgeBindAddr,
|
||||
HAConnections: c.Int(haConnectionsFlag),
|
||||
IncidentLookup: supervisor.NewIncidentLookup(),
|
||||
IsAutoupdated: c.Bool("is-autoupdated"),
|
||||
LBPool: c.String("lb-pool"),
|
||||
Tags: tags,
|
||||
|
|
|
@ -1,117 +0,0 @@
|
|||
package supervisor
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cloudflare/golibs/lrucache"
|
||||
)
|
||||
|
||||
// StatusPage.io API docs:
|
||||
// https://www.cloudflarestatus.com/api/v2/#incidents-unresolved
|
||||
const (
|
||||
activeIncidentsURL = "https://yh6f0r4529hb.statuspage.io/api/v2/incidents/unresolved.json"
|
||||
argoTunnelKeyword = "argo tunnel"
|
||||
incidentDetailsPrefix = "https://www.cloudflarestatus.com/incidents/"
|
||||
)
|
||||
|
||||
// IncidentLookup is an object that checks for active incidents in
|
||||
// the Cloudflare infrastructure.
|
||||
type IncidentLookup interface {
|
||||
ActiveIncidents() []Incident
|
||||
}
|
||||
|
||||
// NewIncidentLookup returns a new IncidentLookup instance that caches its
|
||||
// results with a 1-minute TTL.
|
||||
func NewIncidentLookup() IncidentLookup {
|
||||
return newCachedIncidentLookup(fetchActiveIncidents)
|
||||
}
|
||||
|
||||
type IncidentUpdate struct {
|
||||
Body string
|
||||
}
|
||||
|
||||
type Incident struct {
|
||||
Name string
|
||||
ID string `json:"id"`
|
||||
Updates []IncidentUpdate `json:"incident_updates"`
|
||||
}
|
||||
|
||||
type StatusPage struct {
|
||||
Incidents []Incident
|
||||
}
|
||||
|
||||
func (i Incident) URL() string {
|
||||
return incidentDetailsPrefix + i.ID
|
||||
}
|
||||
|
||||
func parseStatusPage(data []byte) (*StatusPage, error) {
|
||||
var result StatusPage
|
||||
err := json.Unmarshal(data, &result)
|
||||
return &result, err
|
||||
}
|
||||
|
||||
func isArgoTunnelIncident(i Incident) bool {
|
||||
if strings.Contains(strings.ToLower(i.Name), argoTunnelKeyword) {
|
||||
return true
|
||||
}
|
||||
for _, u := range i.Updates {
|
||||
if strings.Contains(strings.ToLower(u.Body), argoTunnelKeyword) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func fetchActiveIncidents() (incidents []Incident) {
|
||||
resp, err := http.Get(activeIncidentsURL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
statusPage, err := parseStatusPage(body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, i := range statusPage.Incidents {
|
||||
if isArgoTunnelIncident(i) {
|
||||
incidents = append(incidents, i)
|
||||
}
|
||||
}
|
||||
return incidents
|
||||
}
|
||||
|
||||
type cachedIncidentLookup struct {
|
||||
cache *lrucache.LRUCache
|
||||
ttl time.Duration
|
||||
uncachedLookup func() []Incident
|
||||
}
|
||||
|
||||
func newCachedIncidentLookup(uncachedLookup func() []Incident) *cachedIncidentLookup {
|
||||
return &cachedIncidentLookup{
|
||||
cache: lrucache.NewLRUCache(1),
|
||||
ttl: time.Minute,
|
||||
uncachedLookup: uncachedLookup,
|
||||
}
|
||||
}
|
||||
|
||||
// We only need one cache entry. Always use the empty string as its key.
|
||||
const cacheKey = ""
|
||||
|
||||
func (c *cachedIncidentLookup) ActiveIncidents() []Incident {
|
||||
if cached, ok := c.cache.GetNotStale(cacheKey); ok {
|
||||
if incidents, ok := cached.([]Incident); ok {
|
||||
return incidents
|
||||
}
|
||||
}
|
||||
incidents := c.uncachedLookup()
|
||||
c.cache.Set(cacheKey, incidents, time.Now().Add(c.ttl))
|
||||
return incidents
|
||||
}
|
|
@ -1,384 +0,0 @@
|
|||
package supervisor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/cloudflare/golibs/lrucache"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseStatusPage(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input []byte
|
||||
output *StatusPage
|
||||
fail bool
|
||||
}{
|
||||
{
|
||||
input: []byte(`<html>
|
||||
<head><title>504 Gateway Time-out</title></head>
|
||||
<body><center><h1>504 Gateway Time-out</h1></center></body>
|
||||
</html>`),
|
||||
output: nil,
|
||||
fail: true,
|
||||
},
|
||||
{
|
||||
input: []byte(`{
|
||||
"page": {
|
||||
"id": "yh6f0r4529hb",
|
||||
"name": "Cloudflare",
|
||||
"url": "https://www.cloudflarestatus.com",
|
||||
"time_zone": "Etc/UTC",
|
||||
"updated_at": "2019-01-10T20:11:38.750Z"
|
||||
},
|
||||
"incidents": [
|
||||
{
|
||||
"name": "Cloudflare API service issues",
|
||||
"status": "resolved",
|
||||
"created_at": "2018-09-17T19:29:21.132Z",
|
||||
"updated_at": "2018-09-18T07:45:41.313Z",
|
||||
"monitoring_at": "2018-09-17T21:35:06.492Z",
|
||||
"resolved_at": "2018-09-18T07:45:41.290Z",
|
||||
"shortlink": "http://stspg.io/7f079791e",
|
||||
"id": "q746ybtyb6q0",
|
||||
"page_id": "yh6f0r4529hb",
|
||||
"incident_updates": [
|
||||
{
|
||||
"status": "resolved",
|
||||
"body": "Cloudflare has resolved the issue and the service have resumed normal operation.",
|
||||
"created_at": "2018-09-18T07:45:41.290Z",
|
||||
"updated_at": "2018-09-18T07:45:41.290Z",
|
||||
"display_at": "2018-09-18T07:45:41.290Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "g4tb35rs9yw7",
|
||||
"name": "Cloudflare customer dashboard and APIs - Cloudflare APIs",
|
||||
"old_status": "operational",
|
||||
"new_status": "operational"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": true,
|
||||
"tweet_id": null,
|
||||
"id": "zl5g2pl5zhfs",
|
||||
"incident_id": "q746ybtyb6q0",
|
||||
"custom_tweet": null
|
||||
},
|
||||
{
|
||||
"status": "monitoring",
|
||||
"body": "Cloudflare has implemented a fix for this issue and is currently monitoring the results.\r\n\r\nWe will update the status once the issue is resolved.",
|
||||
"created_at": "2018-09-17T21:35:06.492Z",
|
||||
"updated_at": "2018-09-17T21:35:06.492Z",
|
||||
"display_at": "2018-09-17T21:35:06.492Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "g4tb35rs9yw7",
|
||||
"name": "Cloudflare customer dashboard and APIs - Cloudflare APIs",
|
||||
"old_status": "degraded_performance",
|
||||
"new_status": "operational"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": false,
|
||||
"tweet_id": null,
|
||||
"id": "0001sv3chdnx",
|
||||
"incident_id": "q746ybtyb6q0",
|
||||
"custom_tweet": null
|
||||
},
|
||||
{
|
||||
"status": "investigating",
|
||||
"body": "We are continuing to investigate this issue.",
|
||||
"created_at": "2018-09-17T19:30:08.049Z",
|
||||
"updated_at": "2018-09-17T19:30:08.049Z",
|
||||
"display_at": "2018-09-17T19:30:08.049Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "g4tb35rs9yw7",
|
||||
"name": "Cloudflare customer dashboard and APIs - Cloudflare APIs",
|
||||
"old_status": "operational",
|
||||
"new_status": "degraded_performance"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": false,
|
||||
"tweet_id": null,
|
||||
"id": "qdr164tfpq7m",
|
||||
"incident_id": "q746ybtyb6q0",
|
||||
"custom_tweet": null
|
||||
},
|
||||
{
|
||||
"status": "investigating",
|
||||
"body": "Cloudflare is investigating issues with APIs and Page Rule delays for Page Rule updates. Cloudflare Page Rule service delivery is unaffected and is operating normally. Also, these issues do not affect the Cloudflare CDN and therefore, do not impact customer websites.",
|
||||
"created_at": "2018-09-17T19:29:21.201Z",
|
||||
"updated_at": "2018-09-17T19:29:21.201Z",
|
||||
"display_at": "2018-09-17T19:29:21.201Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "g4tb35rs9yw7",
|
||||
"name": "Cloudflare customer dashboard and APIs - Cloudflare APIs",
|
||||
"old_status": "operational",
|
||||
"new_status": "operational"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": false,
|
||||
"tweet_id": null,
|
||||
"id": "qzl2n0q8tskg",
|
||||
"incident_id": "q746ybtyb6q0",
|
||||
"custom_tweet": null
|
||||
}
|
||||
],
|
||||
"components": [
|
||||
{
|
||||
"status": "operational",
|
||||
"name": "Cloudflare APIs",
|
||||
"created_at": "2014-10-09T03:32:07.158Z",
|
||||
"updated_at": "2019-01-01T22:58:30.846Z",
|
||||
"position": 2,
|
||||
"description": null,
|
||||
"showcase": false,
|
||||
"id": "g4tb35rs9yw7",
|
||||
"page_id": "yh6f0r4529hb",
|
||||
"group_id": "1km35smx8p41",
|
||||
"group": false,
|
||||
"only_show_if_degraded": false
|
||||
}
|
||||
],
|
||||
"impact": "minor"
|
||||
},
|
||||
{
|
||||
"name": "Web Analytics Delays",
|
||||
"status": "resolved",
|
||||
"created_at": "2018-09-17T18:05:39.907Z",
|
||||
"updated_at": "2018-09-17T22:53:05.078Z",
|
||||
"monitoring_at": null,
|
||||
"resolved_at": "2018-09-17T22:53:05.057Z",
|
||||
"shortlink": "http://stspg.io/cb208928c",
|
||||
"id": "wqfk9mzs5qt1",
|
||||
"page_id": "yh6f0r4529hb",
|
||||
"incident_updates": [
|
||||
{
|
||||
"status": "resolved",
|
||||
"body": "Cloudflare has resolved the issue and Web Analytics have resumed normal operation.",
|
||||
"created_at": "2018-09-17T22:53:05.057Z",
|
||||
"updated_at": "2018-09-17T22:53:05.057Z",
|
||||
"display_at": "2018-09-17T22:53:05.057Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "4c231tkdlpcl",
|
||||
"name": "Cloudflare customer dashboard and APIs - Analytics",
|
||||
"old_status": "degraded_performance",
|
||||
"new_status": "operational"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": false,
|
||||
"tweet_id": null,
|
||||
"id": "93y1w00yqzk4",
|
||||
"incident_id": "wqfk9mzs5qt1",
|
||||
"custom_tweet": null
|
||||
},
|
||||
{
|
||||
"status": "investigating",
|
||||
"body": "There is a delay in processing Cloudflare Web Analytics. This affects timely delivery of customer data.\n\nThese delays do not impact analytics for DNS and Rate Limiting.",
|
||||
"created_at": "2018-09-17T18:05:40.033Z",
|
||||
"updated_at": "2018-09-17T18:05:40.033Z",
|
||||
"display_at": "2018-09-17T18:05:40.033Z",
|
||||
"affected_components": [
|
||||
{
|
||||
"code": "4c231tkdlpcl",
|
||||
"name": "Cloudflare customer dashboard and APIs - Analytics",
|
||||
"old_status": "operational",
|
||||
"new_status": "degraded_performance"
|
||||
}
|
||||
],
|
||||
"deliver_notifications": false,
|
||||
"tweet_id": null,
|
||||
"id": "362t6lv0vrpk",
|
||||
"incident_id": "wqfk9mzs5qt1",
|
||||
"custom_tweet": null
|
||||
}
|
||||
],
|
||||
"components": [
|
||||
{
|
||||
"status": "operational",
|
||||
"name": "Analytics",
|
||||
"created_at": "2014-11-13T11:54:10.191Z",
|
||||
"updated_at": "2018-12-31T08:20:52.349Z",
|
||||
"position": 3,
|
||||
"description": "Customer data",
|
||||
"showcase": false,
|
||||
"id": "4c231tkdlpcl",
|
||||
"page_id": "yh6f0r4529hb",
|
||||
"group_id": "1km35smx8p41",
|
||||
"group": false,
|
||||
"only_show_if_degraded": false
|
||||
}
|
||||
],
|
||||
"impact": "minor"
|
||||
}
|
||||
]
|
||||
}`),
|
||||
output: &StatusPage{
|
||||
Incidents: []Incident{
|
||||
Incident{
|
||||
Name: "Cloudflare API service issues",
|
||||
ID: "q746ybtyb6q0",
|
||||
Updates: []IncidentUpdate{
|
||||
IncidentUpdate{
|
||||
Body: "Cloudflare has resolved the issue and the service have resumed normal operation.",
|
||||
},
|
||||
IncidentUpdate{
|
||||
Body: "Cloudflare has implemented a fix for this issue and is currently monitoring the results.\r\n\r\nWe will update the status once the issue is resolved.",
|
||||
},
|
||||
IncidentUpdate{
|
||||
Body: "We are continuing to investigate this issue.",
|
||||
},
|
||||
IncidentUpdate{
|
||||
Body: "Cloudflare is investigating issues with APIs and Page Rule delays for Page Rule updates. Cloudflare Page Rule service delivery is unaffected and is operating normally. Also, these issues do not affect the Cloudflare CDN and therefore, do not impact customer websites.",
|
||||
},
|
||||
},
|
||||
},
|
||||
Incident{
|
||||
Name: "Web Analytics Delays",
|
||||
ID: "wqfk9mzs5qt1",
|
||||
Updates: []IncidentUpdate{
|
||||
IncidentUpdate{
|
||||
Body: "Cloudflare has resolved the issue and Web Analytics have resumed normal operation.",
|
||||
},
|
||||
IncidentUpdate{
|
||||
Body: "There is a delay in processing Cloudflare Web Analytics. This affects timely delivery of customer data.\n\nThese delays do not impact analytics for DNS and Rate Limiting.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
fail: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
output, err := parseStatusPage(testCase.input)
|
||||
if testCase.fail {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, testCase.output, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsArgoTunnelIncident(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input Incident
|
||||
output bool
|
||||
}{
|
||||
{
|
||||
input: Incident{},
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
input: Incident{Name: "An Argo Tunnel incident"},
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
input: Incident{Name: "an argo tunnel incident"},
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
input: Incident{Name: "an aRgO TuNnEl incident"},
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
input: Incident{Name: "an argotunnel incident"},
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
input: Incident{Name: "irrelevant"},
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
input: Incident{
|
||||
Name: "irrelevant",
|
||||
Updates: []IncidentUpdate{
|
||||
IncidentUpdate{Body: "irrelevant"},
|
||||
IncidentUpdate{Body: "an Argo Tunnel incident"},
|
||||
IncidentUpdate{Body: "irrelevant"},
|
||||
},
|
||||
},
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
input: Incident{
|
||||
Name: "an Argo Tunnel incident",
|
||||
Updates: []IncidentUpdate{
|
||||
IncidentUpdate{Body: "irrelevant"},
|
||||
IncidentUpdate{Body: "irrelevant"},
|
||||
IncidentUpdate{Body: "irrelevant"},
|
||||
},
|
||||
},
|
||||
output: true,
|
||||
},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
actual := isArgoTunnelIncident(testCase.input)
|
||||
assert.Equal(t, testCase.output, actual, "Test case failed: %v", testCase.input)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIncidentURL(t *testing.T) {
|
||||
incident := Incident{
|
||||
ID: "s6k0dnn5347b",
|
||||
}
|
||||
assert.Equal(t, "https://www.cloudflarestatus.com/incidents/s6k0dnn5347b", incident.URL())
|
||||
}
|
||||
|
||||
func TestNewCachedIncidentLookup(t *testing.T) {
|
||||
c := newCachedIncidentLookup(func() []Incident { return nil })
|
||||
assert.Equal(t, time.Minute, c.ttl)
|
||||
assert.Equal(t, 1, c.cache.Capacity())
|
||||
}
|
||||
|
||||
func TestCachedIncidentLookup(t *testing.T) {
|
||||
expected := []Incident{
|
||||
Incident{
|
||||
Name: "An incident",
|
||||
ID: "incidentID",
|
||||
},
|
||||
}
|
||||
|
||||
var shouldCallUncachedLookup bool
|
||||
c := &cachedIncidentLookup{
|
||||
cache: lrucache.NewLRUCache(1),
|
||||
ttl: 50 * time.Millisecond,
|
||||
uncachedLookup: func() []Incident {
|
||||
if !shouldCallUncachedLookup {
|
||||
t.Fatal("uncachedLookup shouldn't have been called")
|
||||
}
|
||||
return expected
|
||||
},
|
||||
}
|
||||
|
||||
shouldCallUncachedLookup = true
|
||||
assert.Equal(t, expected, c.ActiveIncidents())
|
||||
|
||||
shouldCallUncachedLookup = false
|
||||
assert.Equal(t, expected, c.ActiveIncidents())
|
||||
assert.Equal(t, expected, c.ActiveIncidents())
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
shouldCallUncachedLookup = true
|
||||
assert.Equal(t, expected, c.ActiveIncidents())
|
||||
}
|
||||
|
||||
func TestCachedIncidentLookupDoesntPanic(t *testing.T) {
|
||||
expected := []Incident{
|
||||
Incident{
|
||||
Name: "An incident",
|
||||
ID: "incidentID",
|
||||
},
|
||||
}
|
||||
c := &cachedIncidentLookup{
|
||||
cache: lrucache.NewLRUCache(1),
|
||||
ttl: 50 * time.Millisecond,
|
||||
uncachedLookup: func() []Incident { return expected },
|
||||
}
|
||||
c.cache.Set(cacheKey, 42, time.Now().Add(30*time.Minute))
|
||||
actual := c.ActiveIncidents()
|
||||
assert.Equal(t, expected, actual)
|
||||
}
|
|
@ -47,7 +47,6 @@ type TunnelConfig struct {
|
|||
EdgeIPVersion allregions.ConfigIPVersion
|
||||
EdgeBindAddr net.IP
|
||||
HAConnections int
|
||||
IncidentLookup IncidentLookup
|
||||
IsAutoupdated bool
|
||||
LBPool string
|
||||
Tags []tunnelpogs.Tag
|
||||
|
@ -436,9 +435,6 @@ func (e *EdgeTunnelServer) serveTunnel(
|
|||
connLog.ConnAwareLogger().Err(err).Msg("Register tunnel error from server side")
|
||||
// Don't send registration error return from server to Sentry. They are
|
||||
// logged on server side
|
||||
if incidents := e.config.IncidentLookup.ActiveIncidents(); len(incidents) > 0 {
|
||||
connLog.ConnAwareLogger().Msg(activeIncidentsMsg(incidents))
|
||||
}
|
||||
return err.Cause, !err.Permanent
|
||||
case *connection.EdgeQuicDialError:
|
||||
return err, false
|
||||
|
@ -675,16 +671,3 @@ func (cf *connectedFuse) Connected() {
|
|||
func (cf *connectedFuse) IsConnected() bool {
|
||||
return cf.fuse.Value()
|
||||
}
|
||||
|
||||
func activeIncidentsMsg(incidents []Incident) string {
|
||||
preamble := "There is an active Cloudflare incident that may be related:"
|
||||
if len(incidents) > 1 {
|
||||
preamble = "There are active Cloudflare incidents that may be related:"
|
||||
}
|
||||
incidentStrings := []string{}
|
||||
for _, incident := range incidents {
|
||||
incidentString := fmt.Sprintf("%s (%s)", incident.Name, incident.URL())
|
||||
incidentStrings = append(incidentStrings, incidentString)
|
||||
}
|
||||
return preamble + " " + strings.Join(incidentStrings, "; ")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue