@@ -3,11 +3,15 @@ package promql
33import (
44 "context"
55 "fmt"
6+ "sync"
7+ "time"
68
79 "github.com/prometheus/client_golang/api"
810 prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
911 "github.com/prometheus/common/config"
1012
13+ "k8s.io/klog/v2"
14+
1115 "github.com/openshift/cluster-version-operator/pkg/clusterconditions"
1216)
1317
@@ -22,32 +26,49 @@ func NewAlertGetter(promQLTarget clusterconditions.PromQLTarget) Getter {
2226 if ! ok {
2327 panic ("invalid condition type" )
2428 }
25- return & ocAlertGetter {promQL : v }
29+ return & ocAlertGetter {promQL : v , expiration : 1 * time . Minute }
2630}
2731
2832type ocAlertGetter struct {
2933 promQL * PromQL
34+
35+ mutex sync.Mutex
36+ cached prometheusv1.AlertsResult
37+ expiration time.Duration
38+ lastRefresh time.Time
3039}
3140
3241func (o * ocAlertGetter ) Get (ctx context.Context ) (prometheusv1.AlertsResult , error ) {
33- var ret prometheusv1.AlertsResult
42+ if time .Now ().After (o .lastRefresh .Add (o .expiration )) {
43+ if err := o .refresh (ctx ); err != nil {
44+ klog .Errorf ("Failed to refresh alerts, using stale cache instead: %v" , err )
45+ }
46+ }
47+ return o .cached , nil
48+ }
49+
50+ func (o * ocAlertGetter ) refresh (ctx context.Context ) error {
51+ o .mutex .Lock ()
52+ defer o .mutex .Unlock ()
53+
54+ klog .Errorf ("refresh alerts ..." )
3455 p := o .promQL
3556 host , err := p .Host (ctx )
3657 if err != nil {
37- return ret , fmt .Errorf ("failure determine thanos IP: %w" , err )
58+ return fmt .Errorf ("failure determine thanos IP: %w" , err )
3859 }
3960 p .url .Host = host
4061 clientConfig := api.Config {Address : p .url .String ()}
4162
4263 if roundTripper , err := config .NewRoundTripperFromConfig (p .HTTPClientConfig , "cluster-conditions" ); err == nil {
4364 clientConfig .RoundTripper = roundTripper
4465 } else {
45- return ret , fmt .Errorf ("creating PromQL round-tripper: %w" , err )
66+ return fmt .Errorf ("creating PromQL round-tripper: %w" , err )
4667 }
4768
4869 promqlClient , err := api .NewClient (clientConfig )
4970 if err != nil {
50- return ret , fmt .Errorf ("creating PromQL client: %w" , err )
71+ return fmt .Errorf ("creating PromQL client: %w" , err )
5172 }
5273
5374 client := & statusCodeNotImplementedForPostClient {
@@ -63,5 +84,12 @@ func (o *ocAlertGetter) Get(ctx context.Context) (prometheusv1.AlertsResult, err
6384 defer cancel ()
6485 }
6586
66- return v1api .Alerts (queryContext )
87+ r , err := v1api .Alerts (queryContext )
88+ if err != nil {
89+ return fmt .Errorf ("failed to get alerts: %w" , err )
90+ }
91+ o .cached = r
92+ o .lastRefresh = time .Now ()
93+ klog .Errorf ("refreshed alerts" )
94+ return nil
6795}
0 commit comments