Skip to content

Commit 5311d54

Browse files
committed
ToSquash: Use cache to avoid overload Prometheus
1 parent b516511 commit 5311d54

1 file changed

Lines changed: 34 additions & 6 deletions

File tree

pkg/clusterconditions/promql/alerts.go

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ package promql
33
import (
44
"context"
55
"fmt"
6+
"sync"
7+
"time"
68

79
"github.com/prometheus/client_golang/api"
810
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
911
"github.com/prometheus/common/config"
1012

13+
"k8s.io/klog/v2"
14+
1115
"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
1216
)
1317

@@ -22,32 +26,49 @@ func NewAlertGetter(promQLTarget clusterconditions.PromQLTarget) Getter {
2226
if !ok {
2327
panic("invalid condition type")
2428
}
25-
return &ocAlertGetter{promQL: v}
29+
return &ocAlertGetter{promQL: v, expiration: 1 * time.Minute}
2630
}
2731

2832
type ocAlertGetter struct {
2933
promQL *PromQL
34+
35+
mutex sync.Mutex
36+
cached prometheusv1.AlertsResult
37+
expiration time.Duration
38+
lastRefresh time.Time
3039
}
3140

3241
func (o *ocAlertGetter) Get(ctx context.Context) (prometheusv1.AlertsResult, error) {
33-
var ret prometheusv1.AlertsResult
42+
if time.Now().After(o.lastRefresh.Add(o.expiration)) {
43+
if err := o.refresh(ctx); err != nil {
44+
klog.Errorf("Failed to refresh alerts, using stale cache instead: %v", err)
45+
}
46+
}
47+
return o.cached, nil
48+
}
49+
50+
func (o *ocAlertGetter) refresh(ctx context.Context) error {
51+
o.mutex.Lock()
52+
defer o.mutex.Unlock()
53+
54+
klog.Errorf("refresh alerts ...")
3455
p := o.promQL
3556
host, err := p.Host(ctx)
3657
if err != nil {
37-
return ret, fmt.Errorf("failure determine thanos IP: %w", err)
58+
return fmt.Errorf("failure determine thanos IP: %w", err)
3859
}
3960
p.url.Host = host
4061
clientConfig := api.Config{Address: p.url.String()}
4162

4263
if roundTripper, err := config.NewRoundTripperFromConfig(p.HTTPClientConfig, "cluster-conditions"); err == nil {
4364
clientConfig.RoundTripper = roundTripper
4465
} else {
45-
return ret, fmt.Errorf("creating PromQL round-tripper: %w", err)
66+
return fmt.Errorf("creating PromQL round-tripper: %w", err)
4667
}
4768

4869
promqlClient, err := api.NewClient(clientConfig)
4970
if err != nil {
50-
return ret, fmt.Errorf("creating PromQL client: %w", err)
71+
return fmt.Errorf("creating PromQL client: %w", err)
5172
}
5273

5374
client := &statusCodeNotImplementedForPostClient{
@@ -63,5 +84,12 @@ func (o *ocAlertGetter) Get(ctx context.Context) (prometheusv1.AlertsResult, err
6384
defer cancel()
6485
}
6586

66-
return v1api.Alerts(queryContext)
87+
r, err := v1api.Alerts(queryContext)
88+
if err != nil {
89+
return fmt.Errorf("failed to get alerts: %w", err)
90+
}
91+
o.cached = r
92+
o.lastRefresh = time.Now()
93+
klog.Errorf("refreshed alerts")
94+
return nil
6795
}

0 commit comments

Comments
 (0)