Skip to content

Commit b883d13

Browse files
committed
ToSquash: Add more logs for debugging
1 parent 5a5f125 commit b883d13

5 files changed

Lines changed: 194 additions & 3 deletions

File tree

pkg/clusterconditions/promql/alerts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,6 @@ func (o *ocAlertGetter) refresh(ctx context.Context) error {
9090
}
9191
o.cached = r
9292
o.lastRefresh = time.Now()
93-
klog.Errorf("refreshed alerts")
93+
klog.Errorf("refreshed: %d alerts", len(o.cached.Alerts))
9494
return nil
9595
}

pkg/cvo/availableupdates.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ func (u *availableUpdates) evaluateAlertRisks(ctx context.Context) error {
545545
}
546546

547547
func alertsToRisks(alerts []prometheusv1.Alert) []configv1.ConditionalUpdateRisk {
548+
klog.V(2).Infof("Found %d alerts", len(alerts))
548549
risks := map[string]configv1.ConditionalUpdateRisk{}
549550
for _, alert := range alerts {
550551
var alertName string
@@ -638,6 +639,7 @@ func alertsToRisks(alerts []prometheusv1.Alert) []configv1.ConditionalUpdateRisk
638639
}
639640
}
640641

642+
klog.V(2).Infof("Got %d risks", len(risks))
641643
if len(risks) == 0 {
642644
return nil
643645
}

pkg/cvo/availableupdates_test.go

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ package cvo
22

33
import (
44
"context"
5+
"encoding/json"
56
"errors"
67
"fmt"
78
"net/http"
89
"net/http/httptest"
910
"net/url"
11+
"os"
12+
"path/filepath"
1013
"runtime"
1114
"testing"
1215
"time"
@@ -966,16 +969,33 @@ func Test_loadRiskConditions(t *testing.T) {
966969
}
967970

968971
type mockAlertGetter struct {
969-
ret prometheusv1.AlertsResult
972+
ret prometheusv1.AlertsResult
973+
jsonFile string
970974
}
971975

972-
func (m *mockAlertGetter) Get(ctx context.Context) (prometheusv1.AlertsResult, error) {
976+
func (m *mockAlertGetter) Get(_ context.Context) (prometheusv1.AlertsResult, error) {
977+
var ret prometheusv1.AlertsResult
978+
if m.jsonFile != "" {
979+
data, err := os.ReadFile(m.jsonFile)
980+
if err != nil {
981+
return ret, err
982+
}
983+
err = json.Unmarshal(data, &ret)
984+
if err != nil {
985+
return ret, err
986+
}
987+
return ret, nil
988+
}
973989
return m.ret, nil
974990
}
975991

976992
func Test_evaluateAlertConditions(t *testing.T) {
977993
t1 := time.Now()
978994
t2 := time.Now().Add(-3 * time.Minute)
995+
t3, err := time.Parse(time.RFC3339, "2026-03-04T00:38:19.02109776Z")
996+
if err != nil {
997+
t.Fatalf("failed to parse time: %v", err)
998+
}
979999
tests := []struct {
9801000
name string
9811001
u *availableUpdates
@@ -1089,6 +1109,36 @@ func Test_evaluateAlertConditions(t *testing.T) {
10891109
},
10901110
},
10911111
},
1112+
{
1113+
name: "from file",
1114+
u: &availableUpdates{
1115+
AlertGetter: &mockAlertGetter{
1116+
jsonFile: filepath.Join("testdata", "alerts.json"),
1117+
},
1118+
},
1119+
expectedAlertRisks: []configv1.ConditionalUpdateRisk{
1120+
{
1121+
Name: "TestAlert",
1122+
Message: "Test summary.",
1123+
URL: "todo-url",
1124+
MatchingRules: []configv1.ClusterCondition{
1125+
{
1126+
Type: "PromQL",
1127+
PromQL: &configv1.PromQLClusterCondition{
1128+
PromQL: "todo-expression",
1129+
},
1130+
},
1131+
},
1132+
Conditions: []metav1.Condition{{
1133+
Type: "Applies",
1134+
Status: "True",
1135+
Reason: "Alert:firing",
1136+
Message: "critical alert TestAlert firing, suggesting significant cluster issues worth investigating. Test summary. The alert description is: Test description. <alert does not have a runbook_url annotation>",
1137+
LastTransitionTime: metav1.NewTime(t3),
1138+
}},
1139+
},
1140+
},
1141+
},
10921142
}
10931143
for _, tt := range tests {
10941144
t.Run(tt.name, func(t *testing.T) {

pkg/cvo/status.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ func conditionalUpdateWithRiskNamesAndRiskConditions(conditionalUpdates []config
477477
}
478478

479479
func conditionalUpdateRisks(conditionalUpdates []configv1.ConditionalUpdate, alertRisks []configv1.ConditionalUpdateRisk) []configv1.ConditionalUpdateRisk {
480+
klog.V(2).Infof("Got %d alert risks", len(alertRisks))
480481
var result []configv1.ConditionalUpdateRisk
481482
riskNames := sets.New[string]()
482483
for _, conditionalUpdate := range conditionalUpdates {
@@ -492,6 +493,7 @@ func conditionalUpdateRisks(conditionalUpdates []configv1.ConditionalUpdate, ale
492493
sort.Slice(result, func(i, j int) bool {
493494
return result[i].Name < result[j].Name
494495
})
496+
klog.V(2).Infof("Got %d conditional update risks", len(result))
495497
return result
496498
}
497499

pkg/cvo/testdata/alerts.json

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
{
2+
"alerts": [
3+
{
4+
"labels": {
5+
"alertname": "ClusterNotUpgradeable",
6+
"condition": "Upgradeable",
7+
"endpoint": "metrics",
8+
"name": "version",
9+
"namespace": "openshift-cluster-version",
10+
"severity": "info"
11+
},
12+
"annotations": {
13+
"description": "In most cases, you will still be able to apply patch releases. Reason MultipleReasons. For more information refer to 'oc adm upgrade' or https://console-openshift-console.apps.ci-ln-rwc3xi2-76ef8.aws-2.ci.openshift.org/settings/cluster/.",
14+
"summary": "One or more cluster operators have been blocking minor or major version cluster updates for at least an hour."
15+
},
16+
"state": "pending",
17+
"activeAt": "2026-03-04T00:06:31.900150556Z",
18+
"value": "0e+00",
19+
"partialResponseStrategy": "WARN"
20+
},
21+
{
22+
"labels": {
23+
"alertname": "UpdateAvailable",
24+
"channel": "simple",
25+
"namespace": "openshift-cluster-version",
26+
"severity": "info",
27+
"upstream": "https://fauxinnati-fauxinnati.apps.ota-stage.q2z4.p1.openshiftapps.com/api/upgrades_info/graph"
28+
},
29+
"annotations": {
30+
"description": "For more information refer to 'oc adm upgrade' or https://console-openshift-console.apps.ci-ln-rwc3xi2-76ef8.aws-2.ci.openshift.org/settings/cluster/.",
31+
"summary": "Your upstream update recommendation service recommends you update your cluster."
32+
},
33+
"state": "firing",
34+
"activeAt": "2026-03-04T00:36:33.844767047Z",
35+
"value": "2e+00",
36+
"partialResponseStrategy": "WARN"
37+
},
38+
{
39+
"labels": {
40+
"alertname": "TestAlert",
41+
"container": "cluster-version-operator",
42+
"endpoint": "metrics",
43+
"instance": "10.0.61.171:9099",
44+
"job": "cluster-version-operator",
45+
"namespace": "openshift-cluster-version",
46+
"openShiftUpdatePrecheck": "true",
47+
"pod": "cluster-version-operator-dcb5d56cc-5jc94",
48+
"service": "cluster-version-operator",
49+
"severity": "critical"
50+
},
51+
"annotations": {
52+
"description": "Test description.",
53+
"summary": "Test summary."
54+
},
55+
"state": "firing",
56+
"activeAt": "2026-03-04T00:38:19.02109776Z",
57+
"value": "1e+00",
58+
"partialResponseStrategy": "WARN"
59+
},
60+
{
61+
"labels": {
62+
"alertname": "InsightsRecommendationActive",
63+
"container": "insights-operator",
64+
"description": "Enabling the **TechPreviewNoUpgrade** feature set on your cluster\ncan not be undone and prevents minor version updates. Please do\nnot enable this feature set on production clusters.\n",
65+
"endpoint": "https",
66+
"info_link": "https://console.redhat.com/openshift/insights/advisor/clusters/efe476a4-97ad-4c07-bf46-d3da03a5ff6a?first=ccx_rules_ocp.external.rules.upgrade_is_blocked_due_to_tpfg%7CTECH_PREVIEW_NO_UPGRADE_FEATURE_SET_IS_ENABLED",
67+
"instance": "10.129.0.36:8443",
68+
"job": "metrics",
69+
"namespace": "openshift-insights",
70+
"pod": "insights-operator-65f69d8d84-l6f69",
71+
"service": "metrics",
72+
"severity": "info",
73+
"total_risk": "Important"
74+
},
75+
"annotations": {
76+
"description": "Insights recommendation \"Enabling the **TechPreviewNoUpgrade** feature set on your cluster\ncan not be undone and prevents minor version updates. Please do\nnot enable this feature set on production clusters.\n\" with total risk \"Important\" was detected on the cluster. More information is available at https://console.redhat.com/openshift/insights/advisor/clusters/efe476a4-97ad-4c07-bf46-d3da03a5ff6a?first=ccx_rules_ocp.external.rules.upgrade_is_blocked_due_to_tpfg%7CTECH_PREVIEW_NO_UPGRADE_FEATURE_SET_IS_ENABLED.",
77+
"summary": "An Insights recommendation is active for this cluster."
78+
},
79+
"state": "firing",
80+
"activeAt": "2026-03-04T00:07:08.820367488Z",
81+
"value": "1e+00",
82+
"partialResponseStrategy": "WARN"
83+
},
84+
{
85+
"labels": {
86+
"alertname": "TechPreviewNoUpgrade",
87+
"container": "kube-apiserver-operator",
88+
"endpoint": "https",
89+
"instance": "10.129.0.11:8443",
90+
"job": "kube-apiserver-operator",
91+
"name": "TechPreviewNoUpgrade",
92+
"namespace": "openshift-kube-apiserver-operator",
93+
"pod": "kube-apiserver-operator-75c85dd77-zg8fc",
94+
"service": "metrics",
95+
"severity": "warning"
96+
},
97+
"annotations": {
98+
"description": "Cluster has enabled Technology Preview features that cannot be undone and will prevent upgrades. The TechPreviewNoUpgrade feature set is not recommended on production clusters.",
99+
"summary": "Cluster has enabled tech preview features that will prevent upgrades."
100+
},
101+
"state": "firing",
102+
"activeAt": "2026-03-04T00:06:25.787986891Z",
103+
"value": "0e+00",
104+
"partialResponseStrategy": "WARN"
105+
},
106+
{
107+
"labels": {
108+
"alertname": "Watchdog",
109+
"namespace": "openshift-monitoring",
110+
"severity": "none"
111+
},
112+
"annotations": {
113+
"description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n",
114+
"summary": "An alert that should always be firing to certify that Alertmanager is working properly."
115+
},
116+
"state": "firing",
117+
"activeAt": "2026-03-04T00:06:23.165306226Z",
118+
"value": "1e+00",
119+
"partialResponseStrategy": "WARN"
120+
},
121+
{
122+
"labels": {
123+
"alertname": "AlertmanagerReceiversNotConfigured",
124+
"namespace": "openshift-monitoring",
125+
"severity": "warning"
126+
},
127+
"annotations": {
128+
"description": "Alerts are not configured to be sent to a notification system, meaning that you may not be notified in a timely fashion when important failures occur. Check the OpenShift documentation to learn how to configure notifications with Alertmanager.",
129+
"summary": "Receivers (notification integrations) are not configured on Alertmanager"
130+
},
131+
"state": "firing",
132+
"activeAt": "2026-03-04T00:06:46.778977652Z",
133+
"value": "0e+00",
134+
"partialResponseStrategy": "WARN"
135+
}
136+
]
137+
}

0 commit comments

Comments
 (0)