diff --git a/CLAUDE.md b/CLAUDE.md index b8ca18f..b61225e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -138,8 +138,9 @@ Use `ginkgo.By()` for major steps ONLY. Do NOT use inside `Eventually` closures: ```go // CORRECT -ginkgo.By("waiting for cluster to become Reconciled") -err := h.WaitForClusterCondition(ctx, clusterID, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue, timeout) +ginkgo.By("waiting for cluster to become Ready") +Eventually(h.PollCluster(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReady, openapi.ResourceConditionStatusTrue)) // INCORRECT - never do this Eventually(func() { @@ -148,18 +149,55 @@ Eventually(func() { }).Should(Succeed()) ``` -### Async Operations +### Async Operations — Pollers + Custom Matchers -Use `Eventually` with `g.Expect()` (not `Expect()`): +Use **pollers** (thin functions returning current state) with **custom matchers** (reusable assertions). This keeps `Eventually` visible at the call site and avoids combinatorial helper function explosion. +**Wait for a resource condition** (cluster or nodepool): +```go +Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + +Eventually(h.PollNodePool(ctx, clusterID, npID), h.Cfg.Timeouts.NodePool.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReady, openapi.ResourceConditionStatusTrue)) +``` + +**Wait for adapter conditions** (works for both cluster and nodepool adapters): +```go +Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersWithCondition(h.Cfg.Adapters.Cluster, client.ConditionTypeFinalized, openapi.AdapterConditionStatusTrue)) + +Eventually(h.PollNodePoolAdapterStatuses(ctx, clusterID, npID), timeout, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.NodePool, expectedGen)) +``` + +**Wait for hard-delete** (resource returns 404): +```go +Eventually(h.PollClusterHTTPStatus(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) +``` + +**Wait for namespace cleanup**: +```go +Eventually(h.PollNamespacesByPrefix(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(BeEmpty()) +``` + +**For one-off complex assertions**, use `Eventually` with `func(g Gomega)` and `g.Expect()` (not `Expect()`): ```go Eventually(func(g Gomega) { - cluster, err := h.Client.GetCluster(ctx, clusterID) + statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)).To(BeTrue()) -}, timeout, pollInterval).Should(Succeed()) + // complex multi-field validation... +}, timeout, h.Cfg.Polling.Interval).Should(Succeed()) ``` +Available pollers: `PollCluster`, `PollNodePool`, `PollClusterAdapterStatuses`, `PollNodePoolAdapterStatuses`, `PollClusterHTTPStatus`, `PollNodePoolHTTPStatus`, `PollNamespacesByPrefix` — see `pkg/helper/pollers.go`. + +Available matchers: `HaveResourceCondition`, `HaveAllAdaptersWithCondition`, `HaveAllAdaptersAtGeneration` — see `pkg/helper/matchers.go`. + +**Do NOT** create `WaitFor*` wrapper functions that hide `Eventually` inside helpers. + ### Resource Cleanup ALWAYS implement cleanup in `AfterEach`: @@ -202,10 +240,11 @@ Available variables: `.Random`, `.Timestamp`. See `pkg/client/payload.go`. - **Use `ginkgo.By()` in `Eventually`**: Only use at top-level test steps - **Import test packages**: Do NOT import `e2e/*` packages in production code - **Edit OpenAPI schema**: Schema is maintained in hyperfleet-api repo +- **Create `WaitFor*` wrapper functions**: Use pollers + custom matchers instead (see Async Operations) ### DO -- **Use helper functions**: Prefer `h.WaitForClusterCondition()` over manual polling +- **Use pollers + matchers**: Prefer `Eventually(h.PollCluster(...)).Should(helper.HaveResourceCondition(...))` over raw `Eventually` with inline closures - **Use config values**: `h.Cfg.Timeouts.*` for timeouts, `h.Cfg.Polling.*` for intervals - **Store resource IDs**: Save IDs in variables for cleanup - **Check errors**: Use `Expect(err).NotTo(HaveOccurred())` @@ -271,20 +310,22 @@ clusterID = *cluster.Id ### Wait for Condition ```go -err = h.WaitForClusterCondition(ctx, clusterID, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue, h.Cfg.Timeouts.Cluster.Reconciled) -Expect(err).NotTo(HaveOccurred()) +Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) ``` -### Verify Conditions +### Wait for All Adapters ```go -statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) -Expect(err).NotTo(HaveOccurred()) +Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, expectedGen)) +``` -for _, adapter := range statuses.Items { - hasApplied := h.HasCondition(adapter.Conditions, client.ConditionTypeApplied, openapi.True) - Expect(hasApplied).To(BeTrue()) -} +### Verify Conditions (synchronous) + +```go +hasReconciled := h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) +Expect(hasReconciled).To(BeTrue()) ``` ## Documentation diff --git a/docs/architecture.md b/docs/architecture.md index 50ebd92..fe79b3b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -13,13 +13,15 @@ HyperFleet E2E is a Ginkgo-based black-box testing framework for validating Hype ```text pkg/ -├── api/ - OpenAPI generated client -├── client/ - HyperFleet API client wrapper -├── config/ - Configuration loading and validation -├── e2e/ - Test execution engine (Ginkgo) -├── helper/ - Test helper utilities (waits, assertions) -├── labels/ - Test label definitions -└── logger/ - Structured logging (slog) +├── api/ - OpenAPI generated client +├── client/ - HyperFleet API client wrapper +│ ├── kubernetes/ - Kubernetes client (client-go) +│ └── maestro/ - Maestro resource bundle client +├── config/ - Configuration loading and validation +├── e2e/ - Test execution engine (Ginkgo) +├── helper/ - Test helpers (pollers, matchers, resource management) +├── labels/ - Test label definitions +└── logger/ - Structured logging (slog) ``` ## Resource Management @@ -35,11 +37,11 @@ HyperFleet E2E creates ephemeral resources per test for complete isolation. **Workflow**: ```text Test starts - → Create new Helper instance + → Create new Helper instance (helper.New()) → GetTestCluster() creates cluster via API - → Wait for cluster Reconciled condition + → Poll for cluster Reconciled condition (pollers + matchers) → Execute test assertions - → CleanupTestCluster() deletes cluster + → CleanupTestCluster() deletes cluster and namespaces Test ends ``` @@ -54,8 +56,6 @@ timeouts: reconciled: 5m ``` -## Core Packages - ### pkg/config **Purpose**: Configuration loading, validation, and management @@ -100,38 +100,74 @@ Built-in Defaults (lowest priority) - Wraps generated OpenAPI `Client` from `pkg/api/openapi` **Key Methods**: + +*Clusters*: +- `CreateCluster(ctx, req)` / `CreateClusterFromPayload(ctx, path)` - Create cluster - `GetCluster(ctx, clusterID)` - Fetch cluster details -- `CreateCluster(ctx, payload)` - Create new cluster -- `DeleteCluster(ctx, clusterID)` - Delete cluster -- `GetNodePool(ctx, clusterID, nodePoolID)` - Fetch nodepool details -- Similar methods for all HyperFleet resources +- `ListClusters(ctx)` - List all clusters +- `DeleteCluster(ctx, clusterID)` - Soft-delete cluster +- `PatchCluster(ctx, clusterID, req)` / `PatchClusterFromPayload(ctx, clusterID, path)` - Update cluster +- `GetClusterStatuses(ctx, clusterID)` - Fetch adapter statuses + +*NodePools*: +- `CreateNodePool(ctx, clusterID, req)` / `CreateNodePoolFromPayload(ctx, clusterID, path)` - Create nodepool +- `GetNodePool(ctx, clusterID, npID)` - Fetch nodepool details +- `ListNodePools(ctx, clusterID)` - List nodepools for a cluster +- `DeleteNodePool(ctx, clusterID, npID)` - Soft-delete nodepool +- `PatchNodePool(ctx, clusterID, npID, req)` / `PatchNodePoolFromPayload(ctx, clusterID, npID, path)` - Update nodepool +- `GetNodePoolStatuses(ctx, clusterID, npID)` - Fetch adapter statuses ### pkg/helper -**Purpose**: Test helper utilities for resource management +**Purpose**: Test helper utilities — resource management, pollers, matchers, K8s verification **Key Features**: -- Resource lifecycle management (create, wait, cleanup) -- Condition polling and validation -- Per-test helper instance creation +- Per-test helper instance creation (`New()`) +- Resource lifecycle management (create, cleanup) +- Pollers for async assertions with `Eventually` +- Custom Gomega matchers for resource and adapter conditions +- Kubernetes resource verification (namespaces, deployments, jobs, configmaps) +- Adapter deployment/uninstall via Helm **Key Types**: -- `Helper` - Main helper struct with resource management methods +- `Helper` - Main struct with `Cfg`, `Client`, `K8sClient`, `MaestroClient` **Key Methods**: -**Resource Management**: +*Resource Management* (`helper.go`): - `GetTestCluster(ctx, payloadPath)` - Create temporary test cluster -- `CleanupTestCluster(ctx, clusterID)` - Delete test cluster +- `CleanupTestCluster(ctx, clusterID)` - Delete cluster, Maestro bundles, and namespaces - `GetTestNodePool(ctx, clusterID, payloadPath)` - Create nodepool -- `CleanupTestNodePool(ctx, clusterID, nodePoolID)` - Delete nodepool - -**Wait Operations**: -- `WaitForClusterCondition(ctx, clusterID, conditionType, expectedStatus, timeout)` - Poll until cluster condition matches -- `WaitForAllAdapterConditions(ctx, clusterID, conditions)` - Wait for adapter conditions -**Condition Validation**: -- `ValidateAdapterConditions(ctx, clusterID, expectedConditions)` - Check adapter status +*Pollers* (`pollers.go`) — thin functions returning current state for use with `Eventually`: +- `PollCluster(ctx, id)` - Returns `(*Cluster, error)` +- `PollNodePool(ctx, clusterID, npID)` - Returns `(*NodePool, error)` +- `PollClusterAdapterStatuses(ctx, clusterID)` - Returns `(*AdapterStatusList, error)` +- `PollNodePoolAdapterStatuses(ctx, clusterID, npID)` - Returns `(*AdapterStatusList, error)` +- `PollClusterHTTPStatus(ctx, id)` - Returns HTTP status code (200/404) +- `PollNodePoolHTTPStatus(ctx, clusterID, npID)` - Returns HTTP status code (200/404) +- `PollNamespacesByPrefix(ctx, prefix)` - Returns `([]string, error)` + +*Custom Matchers* (`matchers.go`) — reusable Gomega matchers: +- `HaveResourceCondition(condType, status)` - Matches `*Cluster` or `*NodePool` with given condition +- `HaveAllAdaptersWithCondition(adapters, condType, status)` - All required adapters have condition +- `HaveAllAdaptersAtGeneration(adapters, gen)` - All adapters at generation with Applied/Available/Health=True + +*Condition Validation* (`validation.go`): +- `HasResourceCondition(conditions, condType, status)` - Synchronous condition check +- `HasAdapterCondition(conditions, condType, status)` - Synchronous adapter condition check +- `AllConditionsTrue(conditions, condTypes)` - All specified conditions are True +- `AdapterNameToConditionType(adapterName)` - Convert adapter name to condition type string + +*Kubernetes Verification* (`k8s.go`): +- `VerifyNamespaceActive(ctx, name, labels, annotations)` - Namespace exists and Active +- `VerifyDeploymentAvailable(ctx, ns, labels, annotations)` - Deployment is Available +- `VerifyJobComplete(ctx, ns, labels, annotations)` - Job has completed +- `VerifyConfigMap(ctx, ns, labels, annotations)` - ConfigMap exists with expected metadata + +*Adapter Operations* (`adapter.go`): +- `DeployAdapter(ctx, opts)` - Deploy adapter via Helm upgrade --install +- `UninstallAdapter(ctx, releaseName, namespace)` - Uninstall adapter via Helm ### pkg/logger @@ -226,7 +262,7 @@ CLI Invoked (hyperfleet-e2e test) ↓ ┌─────────────────────────────────────┐ │ Run Test Suites │ -│ • Discover all e2e/*_test.go │ +│ • Discover all e2e/*/*.go │ │ • Execute matched tests │ │ • Collect results │ └─────────────────────────────────────┘ @@ -265,7 +301,7 @@ apiClient := openapi.NewClient(...) resp, httpResp, err := apiClient.ClustersAPI.GetCluster(ctx, clusterID).Execute() // Wrapped client (test-friendly) -client := client.NewHyperFleetClient(apiURL) +client, _ := client.NewHyperFleetClient(apiURL, nil) cluster, err := client.GetCluster(ctx, clusterID) ``` diff --git a/docs/development.md b/docs/development.md index 73d935e..a0e2fb1 100644 --- a/docs/development.md +++ b/docs/development.md @@ -72,7 +72,7 @@ import ( . "github.com/onsi/gomega" "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" - "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" ) @@ -91,13 +91,13 @@ var _ = ginkgo.Describe(testName, ginkgo.It("should create cluster successfully", func(ctx context.Context) { ginkgo.By("submitting cluster creation request") - cluster, err := h.Client.CreateClusterFromPayload(ctx, "testdata/payloads/clusters/cluster-request.json") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) Expect(err).NotTo(HaveOccurred()) clusterID = *cluster.Id ginkgo.By("waiting for cluster to become Reconciled") - err = h.WaitForClusterCondition(ctx, clusterID, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue, h.Cfg.Timeouts.Cluster.Reconciled) - Expect(err).NotTo(HaveOccurred()) + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) }) ginkgo.AfterEach(func(ctx context.Context) { @@ -210,40 +210,70 @@ ginkgo.AfterEach(func(ctx context.Context) { ```go // Basic assertions Expect(err).NotTo(HaveOccurred()) -Expect(cluster.ID).NotTo(BeEmpty()) -Expect(h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)).To(BeTrue()) +Expect(cluster.Id).NotTo(BeNil()) +Expect(cluster.Generation).To(Equal(int32(1))) + +// Async: use pollers + custom matchers (preferred) +Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) -// Eventually for async operations +// Async: use func(g Gomega) for complex one-off assertions Eventually(func(g Gomega) { - cluster, err := h.Client.GetCluster(ctx, clusterID) + statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)).To(BeTrue()) -}, h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval).Should(Succeed()) + // multi-field validation... +}, timeout, h.Cfg.Polling.Interval).Should(Succeed()) ``` **Important**: Inside `Eventually` closures, use `g.Expect()` instead of `Expect()` -## Using Helper Functions +## Using Pollers and Matchers -### Wait for Cluster Reconciled +The framework uses **pollers** (functions that fetch current state) and **custom matchers** (reusable Gomega assertions) to compose async checks. This avoids a combinatorial explosion of `WaitFor*` helper functions. + +### Wait for Resource Condition ```go -err = h.WaitForClusterCondition(ctx, clusterID, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue, h.Cfg.Timeouts.Cluster.Reconciled) -Expect(err).NotTo(HaveOccurred()) +// Cluster +Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + +// NodePool (same matcher, different poller) +Eventually(h.PollNodePool(ctx, clusterID, npID), h.Cfg.Timeouts.NodePool.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReady, openapi.ResourceConditionStatusTrue)) ``` -### Check Adapter Conditions +### Wait for Adapter Conditions ```go -statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) -Expect(err).NotTo(HaveOccurred()) +// All adapters finalized +Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersWithCondition(h.Cfg.Adapters.Cluster, client.ConditionTypeFinalized, openapi.AdapterConditionStatusTrue)) -for _, adapter := range statuses.Items { - hasApplied := h.HasCondition(adapter.Conditions, client.ConditionTypeApplied, openapi.True) - Expect(hasApplied).To(BeTrue()) -} +// All adapters at a specific generation with Applied+Available+Health=True +Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, expectedGen)) ``` +### Wait for Hard-Delete + +```go +Eventually(h.PollClusterHTTPStatus(ctx, clusterID), timeout, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) +``` + +### Check Conditions Synchronously + +```go +hasReconciled := h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) +Expect(hasReconciled).To(BeTrue()) + +hasApplied := h.HasAdapterCondition(adapter.Conditions, client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue) +Expect(hasApplied).To(BeTrue()) +``` + +Available pollers: see `pkg/helper/pollers.go`. Available matchers: see `pkg/helper/matchers.go`. + ## Best Practices ### DO ✅ @@ -254,7 +284,7 @@ for _, adapter := range statuses.Items { - Clean up resources in `AfterEach` - Use timeout values from config - Store resource IDs for cleanup -- Use helper functions when available +- Use pollers + custom matchers for async waits (see `pkg/helper/pollers.go`, `pkg/helper/matchers.go`) ### DON'T ❌ @@ -263,6 +293,7 @@ for _, adapter := range statuses.Items { - Don't hardcode timeouts (use config values) - Don't skip cleanup (unless debugging) - Don't ignore errors +- Don't create `WaitFor*` wrapper functions that hide `Eventually` — use pollers + matchers instead ## Adding New Tests @@ -322,31 +353,31 @@ cluster, err := h.Client.CreateClusterFromPayload(ctx, "testdata/payloads/cluste Expect(err).NotTo(HaveOccurred()) ``` -### Wait for Condition Transition +### Wait for Condition ```go -Eventually(func(g Gomega) { - cluster, err := h.Client.GetCluster(ctx, clusterID) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(h.HasResourceCondition(cluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)).To(BeTrue()) -}, timeout, pollInterval).Should(Succeed()) +Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Ready, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) ``` -### Verify All Adapter Conditions +### Wait for All Adapters at Generation + +```go +Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, expectedGen)) +``` + +### Verify Adapter Conditions Synchronously ```go statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) Expect(err).NotTo(HaveOccurred()) for _, adapter := range statuses.Items { - adapterName := adapter.Adapter - ginkgo.By(fmt.Sprintf("verifying adapter %s conditions", adapterName)) - - hasApplied := h.HasCondition(adapter.Conditions, client.ConditionTypeApplied, openapi.True) - Expect(hasApplied).To(BeTrue(), "adapter %s should have Applied=True", adapterName) - - hasAvailable := h.HasCondition(adapter.Conditions, client.ConditionTypeAvailable, openapi.True) - Expect(hasAvailable).To(BeTrue(), "adapter %s should have Available=True", adapterName) + Expect(h.HasAdapterCondition(adapter.Conditions, client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "adapter %s should have Applied=True", adapter.Adapter) + Expect(h.HasAdapterCondition(adapter.Conditions, client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "adapter %s should have Available=True", adapter.Adapter) } ``` diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go index bb91ecf..119a04e 100644 --- a/e2e/cluster/concurrent_creation.go +++ b/e2e/cluster/concurrent_creation.go @@ -87,14 +87,8 @@ var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurr ginkgo.By("Wait for all clusters to reach Reconciled=True and Available=True") for i, clusterID := range clusterIDs { ginkgo.GinkgoWriter.Printf("Waiting for cluster %d (%s) to become Reconciled...\n", i, clusterID) - err := h.WaitForClusterCondition( - ctx, - clusterID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.Cluster.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "cluster %d (%s) should reach Reconciled=True", i, clusterID) + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) cluster, err := h.Client.GetCluster(ctx, clusterID) Expect(err).NotTo(HaveOccurred(), "failed to get cluster %d (%s)", i, clusterID) diff --git a/e2e/cluster/creation.go b/e2e/cluster/creation.go index cefa6e7..d5519f6 100644 --- a/e2e/cluster/creation.go +++ b/e2e/cluster/creation.go @@ -130,14 +130,8 @@ var _ = ginkgo.Describe("[Suite: cluster][baseline] Cluster Resource Type Lifecy ginkgo.By("Verify final cluster state") // Wait for cluster Reconciled condition and verify both Reconciled and Available conditions are True // This confirms the cluster has reached the desired end state - err = h.WaitForClusterCondition( - ctx, - clusterID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.Cluster.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "cluster Reconciled condition should transition to True") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) finalCluster, err := h.Client.GetCluster(ctx, clusterID) Expect(err).NotTo(HaveOccurred(), "failed to get final cluster state") @@ -238,14 +232,8 @@ var _ = ginkgo.Describe("[Suite: cluster][baseline] Cluster Resource Type Lifecy ginkgo.By("Verify final cluster state to ensure Reconciled before cleanup") // Wait for cluster Reconciled condition to prevent namespace deletion conflicts // Without this, adapters may still be creating resources during cleanup - err := h.WaitForClusterCondition( - ctx, - clusterID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.Cluster.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "cluster Reconciled condition should transition to True before cleanup") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) }) }) diff --git a/e2e/cluster/delete.go b/e2e/cluster/delete.go new file mode 100644 index 0000000..4517171 --- /dev/null +++ b/e2e/cluster/delete.go @@ -0,0 +1,211 @@ +package cluster + +import ( + "context" + "errors" + "net/http" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Cluster Deletion Lifecycle", + ginkgo.Label(labels.Tier0), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should complete full deletion lifecycle from soft-delete through hard-delete", func(ctx context.Context) { + clusterBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.By("soft-deleting the cluster") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "DELETE request should succeed with 202") + Expect(deletedCluster.DeletedTime).NotTo(BeNil(), "soft-deleted cluster should have deleted_time set") + Expect(deletedCluster.Generation).To(Equal(clusterBefore.Generation+1), "generation should increment after soft-delete") + + ginkgo.By("waiting for cluster adapters to finalize and cluster to be hard-deleted") + // Hard-delete executes atomically within the POST /adapter_statuses request that + // computes Reconciled=True, so there is no observable window to see Finalized=True + // on the statuses endpoint. Accept either Finalized=True OR 404 (already hard-deleted). + Eventually(func(g Gomega) { + var httpErr *client.HTTPError + _, err := h.Client.GetCluster(ctx, clusterID) + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred()) + statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(statuses).To(helper.HaveAllAdaptersWithCondition( + h.Cfg.Adapters.Cluster, client.ConditionTypeFinalized, openapi.AdapterConditionStatusTrue)) + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("confirming cluster is hard-deleted") + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("verifying downstream K8s namespace is cleaned up") + Eventually(h.PollNamespacesByPrefix(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(BeEmpty()) + }) + + ginkgo.It("should return 409 Conflict when PATCHing a soft-deleted cluster", ginkgo.Label(labels.Negative), func(ctx context.Context) { + ginkgo.By("soft-deleting the cluster") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "DELETE request should succeed with 202") + Expect(deletedCluster.DeletedTime).NotTo(BeNil(), "soft-deleted cluster should have deleted_time set") + deletedGeneration := deletedCluster.Generation + + ginkgo.By("attempting PATCH on the soft-deleted cluster") + patchReq := openapi.ClusterPatchRequest{ + Spec: &openapi.ClusterSpec{"updated-key": "should-not-work"}, + } + resp, err := h.Client.PatchClusterRaw(ctx, clusterID, patchReq) + Expect(err).NotTo(HaveOccurred(), "raw PATCH request should not fail at transport level") + defer func() { _ = resp.Body.Close() }() + Expect(resp.StatusCode).To(Equal(http.StatusConflict), + "PATCH on soft-deleted cluster should return 409 Conflict") + + ginkgo.By("verifying cluster state is unchanged after rejected PATCH") + cluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(cluster.Generation).To(Equal(deletedGeneration), "generation should not change after rejected PATCH") + Expect(cluster.DeletedTime).NotTo(BeNil(), "cluster should still be marked as deleted") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Cluster Cascade Deletion", + ginkgo.Label(labels.Tier0), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID1 string + var nodepoolID2 string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating two nodepools") + np1, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create first nodepool") + Expect(np1.Id).NotTo(BeNil()) + nodepoolID1 = *np1.Id + + np2, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create second nodepool") + Expect(np2.Id).NotTo(BeNil()) + nodepoolID2 = *np2.Id + + ginkgo.By("waiting for both nodepools to reach Reconciled") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID1), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID2), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should cascade deletion to child nodepools and hard-delete all resources", func(ctx context.Context) { + ginkgo.By("soft-deleting the cluster") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "DELETE request should succeed with 202") + Expect(deletedCluster.DeletedTime).NotTo(BeNil(), "cluster should have deleted_time set") + + ginkgo.By("verifying cascade: both child nodepools are soft-deleted or already hard-deleted") + Eventually(func(g Gomega) { + np1, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID1) + var httpErr *client.HTTPError + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred(), "first nodepool should be accessible or 404") + g.Expect(np1.DeletedTime).NotTo(BeNil(), "first nodepool should have deleted_time set via cascade") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + Eventually(func(g Gomega) { + np2, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID2) + var httpErr *client.HTTPError + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred(), "second nodepool should be accessible or 404") + g.Expect(np2.DeletedTime).NotTo(BeNil(), "second nodepool should have deleted_time set via cascade") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("waiting for both nodepools to be hard-deleted") + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID1), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID2), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("waiting for cluster to be hard-deleted after all nodepools removed") + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("verifying downstream K8s namespace is cleaned up") + Eventually(h.PollNamespacesByPrefix(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(BeEmpty()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/cluster/delete_edge_cases.go b/e2e/cluster/delete_edge_cases.go new file mode 100644 index 0000000..c91468d --- /dev/null +++ b/e2e/cluster/delete_edge_cases.go @@ -0,0 +1,339 @@ +package cluster + +import ( + "context" + "errors" + "net/http" + "sync" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Re-DELETE Idempotency and API Boundary Tests", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should handle re-DELETE idempotently without changing deleted_time or generation", func(ctx context.Context) { + ginkgo.By("sending first DELETE request") + firstDelete, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "first DELETE should succeed with 202") + Expect(firstDelete.DeletedTime).NotTo(BeNil(), "first DELETE should set deleted_time") + originalDeletedTime := *firstDelete.DeletedTime + originalGeneration := firstDelete.Generation + + ginkgo.By("sending second DELETE request") + secondDelete, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "second DELETE should succeed with 202") + Expect(secondDelete.DeletedTime).NotTo(BeNil(), "second DELETE should still have deleted_time") + Expect(*secondDelete.DeletedTime).To(Equal(originalDeletedTime), "deleted_time should not change on re-DELETE") + Expect(secondDelete.Generation).To(Equal(originalGeneration), "generation should not increment on re-DELETE") + }) + + ginkgo.It("should return 409 Conflict when creating nodepool under soft-deleted cluster", + ginkgo.Label(labels.Negative), + func(ctx context.Context) { + ginkgo.By("soft-deleting the cluster") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "DELETE should succeed with 202") + Expect(deletedCluster.DeletedTime).NotTo(BeNil()) + + ginkgo.By("attempting to create a nodepool under the soft-deleted cluster") + _, err = h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + var httpErr *client.HTTPError + Expect(errors.As(err, &httpErr)).To(BeTrue(), "error should be HTTPError") + Expect(httpErr.StatusCode).To(Equal(http.StatusConflict), + "creating nodepool under soft-deleted cluster should return 409") + + ginkgo.By("verifying no nodepool was created") + npList, err := h.Client.ListNodePools(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(npList.Items).To(BeEmpty(), "no nodepools should exist under soft-deleted cluster") + }, + ) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] DELETE Non-Existent Cluster", + ginkgo.Label(labels.Tier1, labels.Negative), + func() { + var h *helper.Helper + + ginkgo.BeforeEach(func() { + h = helper.New() + }) + + ginkgo.It("should return 404 when deleting a non-existent cluster", func(ctx context.Context) { + ginkgo.By("sending DELETE for a non-existent cluster ID") + _, err := h.Client.DeleteCluster(ctx, "non-existent-cluster-id-12345") + var httpErr *client.HTTPError + Expect(errors.As(err, &httpErr)).To(BeTrue(), "error should be HTTPError") + Expect(httpErr.StatusCode).To(Equal(http.StatusNotFound), + "DELETE on non-existent cluster should return 404") + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Concurrent Deletion", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should produce a single soft-delete record from simultaneous DELETE requests", func(ctx context.Context) { + ginkgo.By("capturing generation before deletion") + clusterBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + genBefore := clusterBefore.Generation + + ginkgo.By("firing 5 concurrent DELETE requests") + const concurrency = 5 + type deleteResult struct { + cluster *openapi.Cluster + err error + } + results := make([]deleteResult, concurrency) + var wg sync.WaitGroup + wg.Add(concurrency) + for i := range concurrency { + go func(idx int) { + defer wg.Done() + defer ginkgo.GinkgoRecover() + c, e := h.Client.DeleteCluster(ctx, clusterID) + results[idx] = deleteResult{cluster: c, err: e} + }(i) + } + wg.Wait() + + ginkgo.By("verifying all requests succeeded with consistent state") + for i, r := range results { + Expect(r.err).NotTo(HaveOccurred(), "DELETE request %d should succeed", i) + Expect(r.cluster.DeletedTime).NotTo(BeNil(), "DELETE request %d should have deleted_time", i) + } + + // All responses should carry identical deleted_time and generation + referenceTime := *results[0].cluster.DeletedTime + referenceGen := results[0].cluster.Generation + for i := 1; i < concurrency; i++ { + Expect(*results[i].cluster.DeletedTime).To(Equal(referenceTime), + "all DELETE responses should have the same deleted_time") + Expect(results[i].cluster.Generation).To(Equal(referenceGen), + "all DELETE responses should have the same generation") + } + + ginkgo.By("verifying generation incremented exactly once") + Expect(referenceGen).To(Equal(genBefore+1), + "generation should increment by exactly 1, not by the number of concurrent requests") + + ginkgo.By("verifying cluster completes deletion lifecycle") + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] DELETE During Update Reconciliation", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled at generation 1") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil()) + clusterID = *cluster.Id + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should complete deletion when DELETE is sent during update reconciliation", func(ctx context.Context) { + ginkgo.By("sending PATCH to trigger generation 2 (do NOT wait for reconciliation)") + patchedCluster, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Spec: &openapi.ClusterSpec{"trigger-update": "true"}, + }) + Expect(err).NotTo(HaveOccurred(), "PATCH should succeed") + Expect(patchedCluster.Generation).To(Equal(int32(2))) + + ginkgo.By("immediately sending DELETE before update reconciliation completes") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "DELETE should succeed with 202") + Expect(deletedCluster.DeletedTime).NotTo(BeNil()) + Expect(deletedCluster.Generation).To(Equal(int32(3)), + "generation should be 3: create(1) + PATCH(2) + DELETE(3)") + + ginkgo.By("verifying cluster is hard-deleted") + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("verifying downstream K8s namespace is cleaned up") + Eventually(h.PollNamespacesByPrefix(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(BeEmpty()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Recreate Cluster After Hard-Delete", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var firstClusterID string + var secondClusterID string + var originalCluster *openapi.Cluster + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating first cluster and waiting for Reconciled") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil()) + firstClusterID = *cluster.Id + originalCluster = cluster + + Eventually(h.PollCluster(ctx, firstClusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should create a new cluster with the same name after the original is hard-deleted", func(ctx context.Context) { + ginkgo.By("deleting the first cluster and waiting for hard-delete") + _, err := h.Client.DeleteCluster(ctx, firstClusterID) + Expect(err).NotTo(HaveOccurred()) + + Eventually(h.PollClusterHTTPStatus(ctx, firstClusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("waiting for namespace cleanup from first cluster") + Eventually(h.PollNamespacesByPrefix(ctx, firstClusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(BeEmpty()) + + ginkgo.By("creating a new cluster with the same name") + kind := "Cluster" + newCluster, err := h.Client.CreateCluster(ctx, openapi.ClusterCreateRequest{ + Kind: &kind, + Name: originalCluster.Name, + Labels: originalCluster.Labels, + Spec: originalCluster.Spec, + }) + Expect(err).NotTo(HaveOccurred(), "creating cluster with reused name should succeed") + Expect(newCluster.Id).NotTo(BeNil()) + secondClusterID = *newCluster.Id + + Expect(secondClusterID).NotTo(Equal(firstClusterID), + "new cluster should have a different ID than the deleted one") + Expect(newCluster.Generation).To(Equal(int32(1)), + "new cluster should start at generation 1") + + ginkgo.By("waiting for the new cluster to reach Reconciled") + Eventually(h.PollCluster(ctx, secondClusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("verifying the old cluster is still gone") + _, err = h.Client.GetCluster(ctx, firstClusterID) + var httpErr *client.HTTPError + Expect(errors.As(err, &httpErr)).To(BeTrue()) + Expect(httpErr.StatusCode).To(Equal(http.StatusNotFound), + "old cluster should remain 404 after recreate") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil { + return + } + for _, id := range []string{firstClusterID, secondClusterID} { + if id == "" { + continue + } + ginkgo.By("cleaning up cluster " + id) + if cluster, err := h.Client.GetCluster(ctx, id); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, id); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", id, err) + } + } + if err := h.CleanupTestCluster(ctx, id); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", id, err) + } + } + }) + }, +) diff --git a/e2e/cluster/delete_external.go b/e2e/cluster/delete_external.go new file mode 100644 index 0000000..926d23d --- /dev/null +++ b/e2e/cluster/delete_external.go @@ -0,0 +1,93 @@ +package cluster + +import ( + "context" + "errors" + "net/http" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] External K8s Resource Deletion", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("confirming managed K8s namespaces exist") + namespaces, err := h.K8sClient.FindNamespacesByPrefix(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(namespaces).NotTo(BeEmpty(), "managed namespaces should exist after Reconciled") + }) + + ginkgo.It("should treat externally-deleted K8s resources as finalized and complete hard-delete", func(ctx context.Context) { + ginkgo.By("externally deleting all managed K8s namespaces (bypass the API)") + namespaces, err := h.K8sClient.FindNamespacesByPrefix(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + for _, ns := range namespaces { + err := h.K8sClient.DeleteNamespaceAndWait(ctx, ns) + Expect(err).NotTo(HaveOccurred(), "failed to delete namespace %s", ns) + } + + ginkgo.By("verifying all namespaces are gone") + Eventually(h.PollNamespacesByPrefix(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(BeEmpty()) + + ginkgo.By("sending DELETE through the API") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(deletedCluster.DeletedTime).NotTo(BeNil()) + + ginkgo.By("verifying adapters report Finalized=True with Health=True") + Eventually(func(g Gomega) { + var httpErr *client.HTTPError + statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(statuses).To(helper.HaveAllAdaptersWithCondition( + h.Cfg.Adapters.Cluster, client.ConditionTypeFinalized, openapi.AdapterConditionStatusTrue)) + g.Expect(statuses).To(helper.HaveAllAdaptersWithCondition( + h.Cfg.Adapters.Cluster, client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)) + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("verifying cluster is hard-deleted") + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/cluster/delete_visibility.go b/e2e/cluster/delete_visibility.go new file mode 100644 index 0000000..ba0fdb7 --- /dev/null +++ b/e2e/cluster/delete_visibility.go @@ -0,0 +1,186 @@ +package cluster + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] Soft-Deleted Cluster Visibility", + ginkgo.Label(labels.Tier1, labels.Disruptive), + ginkgo.Serial, + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should remain visible via GET and LIST before hard-delete", func(ctx context.Context) { + ginkgo.By("pausing sentinel to freeze reconciliation before soft-delete") + err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelClustersDeployment, 0) + Expect(err).NotTo(HaveOccurred(), "failed to scale sentinel to 0") + ginkgo.DeferCleanup(func(ctx context.Context) { + ginkgo.By("restoring sentinel-clusters to 1 replica") + if err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelClustersDeployment, 1); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to restore sentinel: %v\n", err) + } + }) + + ginkgo.By("soft-deleting the cluster") + deletedCluster, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(deletedCluster.DeletedTime).NotTo(BeNil()) + + ginkgo.By("verifying GET returns the soft-deleted cluster with deleted_time") + Eventually(func(g Gomega) { + cluster, err := h.Client.GetCluster(ctx, clusterID) + g.Expect(err).NotTo(HaveOccurred(), "GET should return 200, not 404") + g.Expect(cluster.DeletedTime).NotTo(BeNil(), "cluster should have deleted_time set") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("verifying LIST includes the soft-deleted cluster") + Eventually(func(g Gomega) { + clusterList, err := h.Client.ListClusters(ctx) + g.Expect(err).NotTo(HaveOccurred()) + + found := false + for _, c := range clusterList.Items { + if c.Id != nil && *c.Id == clusterID { + g.Expect(c.DeletedTime).NotTo(BeNil(), "cluster in LIST should have deleted_time") + found = true + } + } + g.Expect(found).To(BeTrue(), "soft-deleted cluster should appear in LIST") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][delete] LIST Shows Active and Soft-Deleted Clusters", + ginkgo.Label(labels.Tier1, labels.Disruptive), + ginkgo.Serial, + func() { + var h *helper.Helper + var activeClusterID string + var deletedClusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating two clusters and waiting for Reconciled") + var err error + activeClusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create active cluster") + + deletedClusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster to delete") + + Eventually(h.PollCluster(ctx, activeClusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + Eventually(h.PollCluster(ctx, deletedClusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should return both active and soft-deleted clusters in LIST", func(ctx context.Context) { + ginkgo.By("pausing sentinel to freeze reconciliation before soft-delete") + err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelClustersDeployment, 0) + Expect(err).NotTo(HaveOccurred(), "failed to scale sentinel to 0") + ginkgo.DeferCleanup(func(ctx context.Context) { + ginkgo.By("restoring sentinel-clusters to 1 replica") + if err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelClustersDeployment, 1); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to restore sentinel: %v\n", err) + } + }) + + ginkgo.By("soft-deleting one cluster") + _, err = h.Client.DeleteCluster(ctx, deletedClusterID) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.By("verifying LIST returns both clusters simultaneously") + Eventually(func(g Gomega) { + clusterList, err := h.Client.ListClusters(ctx) + g.Expect(err).NotTo(HaveOccurred()) + + var foundActive, foundDeleted bool + for _, c := range clusterList.Items { + if c.Id == nil { + continue + } + if *c.Id == activeClusterID { + g.Expect(c.DeletedTime).To(BeNil(), "active cluster should not have deleted_time") + foundActive = true + } + if *c.Id == deletedClusterID { + g.Expect(c.DeletedTime).NotTo(BeNil(), "deleted cluster should have deleted_time") + foundDeleted = true + } + } + g.Expect(foundActive).To(BeTrue(), "active cluster should appear in LIST") + g.Expect(foundDeleted).To(BeTrue(), "soft-deleted cluster should appear in LIST") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("verifying GET returns correct state for each cluster") + activeCluster, err := h.Client.GetCluster(ctx, activeClusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(activeCluster.DeletedTime).To(BeNil()) + + Eventually(func(g Gomega) { + deletedCluster, err := h.Client.GetCluster(ctx, deletedClusterID) + g.Expect(err).NotTo(HaveOccurred(), "GET on soft-deleted cluster should return 200") + g.Expect(deletedCluster.DeletedTime).NotTo(BeNil()) + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil { + return + } + for _, id := range []string{activeClusterID, deletedClusterID} { + if id == "" { + continue + } + ginkgo.By("cleaning up cluster " + id) + if cluster, err := h.Client.GetCluster(ctx, id); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, id); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", id, err) + } + } + if err := h.CleanupTestCluster(ctx, id); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", id, err) + } + } + }) + }, +) diff --git a/e2e/cluster/update.go b/e2e/cluster/update.go new file mode 100644 index 0000000..e8282e6 --- /dev/null +++ b/e2e/cluster/update.go @@ -0,0 +1,77 @@ +package cluster + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][update] Cluster Update Lifecycle", + ginkgo.Label(labels.Tier0), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled at generation 1") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil(), "cluster ID should be generated") + clusterID = *cluster.Id + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should update cluster via PATCH, trigger reconciliation, and reach Reconciled at new generation", func(ctx context.Context) { + ginkgo.By("verifying cluster is at generation 1 before PATCH") + clusterBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(clusterBefore.Generation).To(Equal(int32(1)), "cluster should be at generation 1 before update") + + ginkgo.By("sending PATCH to update cluster spec") + patchedCluster, err := h.Client.PatchClusterFromPayload(ctx, clusterID, h.TestDataPath("payloads/clusters/cluster-patch.json")) + Expect(err).NotTo(HaveOccurred(), "PATCH request should succeed") + expectedGen := clusterBefore.Generation + 1 + Expect(patchedCluster.Generation).To(Equal(expectedGen), "generation should increment after PATCH") + + ginkgo.By("waiting for all adapters to reconcile at new generation") + Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, expectedGen)) + + ginkgo.By("verifying cluster reaches Reconciled=True at new generation") + Eventually(func(g Gomega) { + finalCluster, err := h.Client.GetCluster(ctx, clusterID) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(finalCluster.Generation).To(Equal(expectedGen), "final cluster generation should match expected") + + found := false + for _, cond := range finalCluster.Status.Conditions { + if cond.Type == client.ConditionTypeReconciled && cond.Status == openapi.ResourceConditionStatusTrue { + found = true + g.Expect(cond.ObservedGeneration).To(Equal(expectedGen), "Reconciled condition observed_generation should match expected") + } + } + g.Expect(found).To(BeTrue(), "cluster should have Reconciled=True") + }, h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval).Should(Succeed()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/cluster/update_edge_cases.go b/e2e/cluster/update_edge_cases.go new file mode 100644 index 0000000..285465c --- /dev/null +++ b/e2e/cluster/update_edge_cases.go @@ -0,0 +1,201 @@ +package cluster + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][update] Rapid Update Coalescing", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled at generation 1") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil()) + clusterID = *cluster.Id + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should coalesce multiple rapid updates and reconcile to the latest generation", func(ctx context.Context) { + ginkgo.By("sending three PATCH requests in rapid succession") + patch1, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Spec: &openapi.ClusterSpec{"update": "first"}, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(patch1.Generation).To(Equal(int32(2))) + + patch2, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Spec: &openapi.ClusterSpec{"update": "second"}, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(patch2.Generation).To(Equal(int32(3))) + + patch3, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Spec: &openapi.ClusterSpec{"update": "third"}, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(patch3.Generation).To(Equal(int32(4))) + + ginkgo.By("waiting for all adapters to reconcile at the final generation") + Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, int32(4))) + + ginkgo.By("verifying cluster reaches Reconciled=True at final generation") + Eventually(func(g Gomega) { + finalCluster, err := h.Client.GetCluster(ctx, clusterID) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(finalCluster.Generation).To(Equal(int32(4))) + + found := false + for _, cond := range finalCluster.Status.Conditions { + if cond.Type == client.ConditionTypeReconciled && cond.Status == openapi.ResourceConditionStatusTrue { + found = true + g.Expect(cond.ObservedGeneration).To(Equal(int32(4))) + } + } + g.Expect(found).To(BeTrue(), "cluster should have Reconciled=True") + }, h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval).Should(Succeed()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][update] Labels-Only PATCH", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled at generation 1") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil()) + clusterID = *cluster.Id + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should bump generation and trigger reconciliation from a labels-only PATCH", func(ctx context.Context) { + ginkgo.By("capturing spec before labels-only PATCH") + clusterBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + specBefore := clusterBefore.Spec + + ginkgo.By("sending labels-only PATCH (preserving existing labels)") + newLabels := make(map[string]string) + if clusterBefore.Labels != nil { + for k, v := range *clusterBefore.Labels { + newLabels[k] = v + } + } + newLabels["env"] = "staging" + newLabels["team"] = "fleet-management" + patchedCluster, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Labels: &newLabels, + }) + Expect(err).NotTo(HaveOccurred(), "labels-only PATCH should succeed") + Expect(patchedCluster.Generation).To(Equal(int32(2)), + "generation should increment after labels-only PATCH") + + ginkgo.By("waiting for all adapters to reconcile at generation 2") + Eventually(h.PollClusterAdapterStatuses(ctx, clusterID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.Cluster, int32(2))) + + ginkgo.By("verifying cluster reaches Reconciled=True with updated labels and unchanged spec") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + finalCluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(finalCluster.Labels).NotTo(BeNil(), "cluster should have labels") + Expect((*finalCluster.Labels)["env"]).To(Equal("staging")) + Expect((*finalCluster.Labels)["team"]).To(Equal("fleet-management")) + Expect(finalCluster.Spec).To(Equal(specBefore), + "spec should be unchanged after labels-only PATCH") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: cluster][update] No-Op PATCH", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled at generation 1") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + Expect(cluster.Id).NotTo(BeNil()) + clusterID = *cluster.Id + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should not increment generation when PATCHing with identical spec", func(ctx context.Context) { + ginkgo.By("capturing canonical spec from current cluster state") + clusterBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + canonicalSpec := clusterBefore.Spec + + ginkgo.By("replaying the same spec via PATCH") + patchedCluster, err := h.Client.PatchCluster(ctx, clusterID, openapi.ClusterPatchRequest{ + Spec: &canonicalSpec, + }) + Expect(err).NotTo(HaveOccurred(), "no-op PATCH should succeed") + Expect(patchedCluster.Generation).To(Equal(clusterBefore.Generation), + "generation should not increment for identical spec PATCH") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/nodepool/concurrent_creation.go b/e2e/nodepool/concurrent_creation.go index 85be514..016345f 100644 --- a/e2e/nodepool/concurrent_creation.go +++ b/e2e/nodepool/concurrent_creation.go @@ -32,6 +32,7 @@ var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can co clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) Expect(err).NotTo(HaveOccurred(), "failed to get test cluster") ginkgo.GinkgoWriter.Printf("Using cluster ID: %s\n", clusterID) + }) ginkgo.It("should create multiple nodepools under the same cluster and all reach Reconciled state with isolated resources", @@ -104,15 +105,8 @@ var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can co ginkgo.By("Wait for all nodepools to reach Reconciled=True and Available=True") for i, npID := range nodepoolIDs { ginkgo.GinkgoWriter.Printf("Waiting for nodepool %d (%s) to become Reconciled...\n", i, npID) - err := h.WaitForNodePoolCondition( - ctx, - clusterID, - npID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.NodePool.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "nodepool %d (%s) should reach Reconciled=True", i, npID) + Eventually(h.PollNodePool(ctx, clusterID, npID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) np, err := h.Client.GetNodePool(ctx, clusterID, npID) Expect(err).NotTo(HaveOccurred(), "failed to get nodepool %d (%s)", i, npID) @@ -180,22 +174,10 @@ var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can co if h == nil || clusterID == "" { return } - - ginkgo.By("Verify final cluster state to ensure Reconciled before cleanup") - err := h.WaitForClusterCondition( - ctx, - clusterID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.Cluster.Reconciled, - ) - if err != nil { - ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Reconciled state before cleanup: %v\n", clusterID, err) + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) } - - ginkgo.By("cleaning up test cluster " + clusterID) - err = h.CleanupTestCluster(ctx, clusterID) - Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID) }) }, ) diff --git a/e2e/nodepool/creation.go b/e2e/nodepool/creation.go index cdcabba..c77ae00 100644 --- a/e2e/nodepool/creation.go +++ b/e2e/nodepool/creation.go @@ -147,15 +147,8 @@ var _ = ginkgo.Describe("[Suite: nodepool][baseline] NodePool Resource Type Life ginkgo.By("Verify final nodepool state") // Wait for nodepool Reconciled condition and verify both Reconciled and Available conditions are True // This confirms the nodepool has reached the desired end state - err = h.WaitForNodePoolCondition( - ctx, - clusterID, - nodepoolID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.NodePool.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "nodepool Reconciled condition should transition to True") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) finalNodePool, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) Expect(err).NotTo(HaveOccurred(), "failed to get final nodepool state") @@ -239,43 +232,19 @@ var _ = ginkgo.Describe("[Suite: nodepool][baseline] NodePool Resource Type Life // Wait for nodepool Reconciled condition and verify both Reconciled and Available conditions are True // This confirms the nodepool workflow completed successfully and all K8s resources were created // Without this, adapters may still be creating resources during cleanup - err := h.WaitForNodePoolCondition( - ctx, - clusterID, - nodepoolID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.NodePool.Reconciled, - ) - Expect(err).NotTo(HaveOccurred(), "nodepool Reconciled condition should transition to True") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) }) }) ginkgo.AfterEach(func(ctx context.Context) { - // Skip cleanup if helper not initialized or no cluster created - // Note: Deleting cluster will cascade delete nodepool automatically if h == nil || clusterID == "" { return } - - ginkgo.By("Verify final cluster state to ensure Reconciled before cleanup") - // Wait for cluster Reconciled condition to prevent namespace deletion conflicts - // Without this, adapters may still be creating resources during cleanup - // TODO Replace this workaround with clusters and nodepools API DELETE once HyperFleet API supports - err := h.WaitForClusterCondition( - ctx, - clusterID, - client.ConditionTypeReconciled, - openapi.ResourceConditionStatusTrue, - h.Cfg.Timeouts.Cluster.Reconciled, - ) - if err != nil { - ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Reconciled state before cleanup: %v\n", clusterID, err) + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) } - - ginkgo.By("cleaning up test cluster " + clusterID) - err = h.CleanupTestCluster(ctx, clusterID) - Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID) }) }, ) diff --git a/e2e/nodepool/delete.go b/e2e/nodepool/delete.go new file mode 100644 index 0000000..46930c1 --- /dev/null +++ b/e2e/nodepool/delete.go @@ -0,0 +1,130 @@ +package nodepool + +import ( + "context" + "errors" + "net/http" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][delete] NodePool Deletion Lifecycle", + ginkgo.Label(labels.Tier0), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating nodepool and waiting for Reconciled") + np, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create nodepool") + Expect(np.Id).NotTo(BeNil(), "nodepool ID should be generated") + nodepoolID = *np.Id + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should complete full deletion lifecycle from soft-delete through hard-delete", func(ctx context.Context) { + npBefore, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + parentBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.By("soft-deleting the nodepool") + deletedNP, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred(), "DELETE request should succeed with 202") + Expect(deletedNP.DeletedTime).NotTo(BeNil(), "soft-deleted nodepool should have deleted_time set") + Expect(deletedNP.Generation).To(Equal(npBefore.Generation+1), "generation should increment after soft-delete") + + ginkgo.By("waiting for nodepool adapters to finalize and nodepool to be hard-deleted") + // Hard-delete executes atomically within the POST /adapter_statuses request that + // computes Reconciled=True, so there is no observable window to see Finalized=True + // on the statuses endpoint. Accept either Finalized=True OR 404 (already hard-deleted). + Eventually(func(g Gomega) { + var httpErr *client.HTTPError + _, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred()) + statuses, err := h.Client.GetNodePoolStatuses(ctx, clusterID, nodepoolID) + if errors.As(err, &httpErr) && httpErr.StatusCode == http.StatusNotFound { + return + } + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(statuses).To(helper.HaveAllAdaptersWithCondition( + h.Cfg.Adapters.NodePool, client.ConditionTypeFinalized, openapi.AdapterConditionStatusTrue)) + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("confirming nodepool is hard-deleted") + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("verifying parent cluster is unaffected") + parentCluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "parent cluster should still exist") + Expect(parentCluster.DeletedTime).To(BeNil(), "parent cluster should not have deleted_time") + Expect(parentCluster.Generation).To(Equal(parentBefore.Generation), "parent cluster generation should remain unchanged") + + hasReconciled := h.HasResourceCondition(parentCluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasReconciled).To(BeTrue(), "parent cluster should remain Reconciled=True") + }) + + ginkgo.It("should return 409 Conflict when PATCHing a soft-deleted nodepool", ginkgo.Label(labels.Negative), func(ctx context.Context) { + ginkgo.By("soft-deleting the nodepool") + deletedNP, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred(), "DELETE request should succeed with 202") + Expect(deletedNP.DeletedTime).NotTo(BeNil(), "soft-deleted nodepool should have deleted_time set") + deletedGeneration := deletedNP.Generation + + ginkgo.By("attempting PATCH on the soft-deleted nodepool") + patchReq := openapi.NodePoolPatchRequest{ + Spec: &openapi.NodePoolSpec{"updated-key": "should-not-work"}, + } + resp, err := h.Client.PatchNodePoolRaw(ctx, clusterID, nodepoolID, patchReq) + Expect(err).NotTo(HaveOccurred(), "raw PATCH request should not fail at transport level") + defer func() { _ = resp.Body.Close() }() + Expect(resp.StatusCode).To(Equal(http.StatusConflict), + "PATCH on soft-deleted nodepool should return 409 Conflict") + + ginkgo.By("verifying nodepool state is unchanged after rejected PATCH") + np, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + Expect(np.Generation).To(Equal(deletedGeneration), "generation should not change after rejected PATCH") + Expect(np.DeletedTime).NotTo(BeNil(), "nodepool should still be marked as deleted") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/nodepool/delete_edge_cases.go b/e2e/nodepool/delete_edge_cases.go new file mode 100644 index 0000000..e872a37 --- /dev/null +++ b/e2e/nodepool/delete_edge_cases.go @@ -0,0 +1,199 @@ +package nodepool + +import ( + "context" + "errors" + "net/http" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][delete] Sibling Nodepool Isolation During Deletion", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID1 string + var nodepoolID2 string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating two nodepools") + np1, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create first nodepool") + Expect(np1.Id).NotTo(BeNil()) + nodepoolID1 = *np1.Id + + np2, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create second nodepool") + Expect(np2.Id).NotTo(BeNil()) + nodepoolID2 = *np2.Id + + ginkgo.By("waiting for both nodepools to reach Reconciled") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID1), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID2), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should not affect sibling nodepool when one is deleted", func(ctx context.Context) { + ginkgo.By("deleting the first nodepool") + deletedNP, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID1) + Expect(err).NotTo(HaveOccurred(), "DELETE should succeed with 202") + Expect(deletedNP.DeletedTime).NotTo(BeNil()) + + ginkgo.By("waiting for the deleted nodepool to be hard-deleted") + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID1), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + ginkgo.By("verifying sibling nodepool is unaffected") + siblingNP, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID2) + Expect(err).NotTo(HaveOccurred(), "sibling nodepool should still be accessible") + Expect(siblingNP.DeletedTime).To(BeNil(), "sibling nodepool should not have deleted_time") + + hasReconciled := h.HasResourceCondition(siblingNP.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasReconciled).To(BeTrue(), "sibling nodepool should remain Reconciled=True") + + ginkgo.By("verifying sibling nodepool adapter statuses are intact") + Eventually(h.PollNodePoolAdapterStatuses(ctx, clusterID, nodepoolID2), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersWithCondition( + h.Cfg.Adapters.NodePool, client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)) + + ginkgo.By("verifying parent cluster is unaffected") + parentCluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "parent cluster should still exist") + Expect(parentCluster.DeletedTime).To(BeNil(), "parent cluster should not have deleted_time") + + hasParentReconciled := h.HasResourceCondition(parentCluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasParentReconciled).To(BeTrue(), "parent cluster should remain Reconciled=True") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: nodepool][delete] Re-DELETE Idempotency", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating nodepool and waiting for Reconciled") + np, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create nodepool") + Expect(np.Id).NotTo(BeNil()) + nodepoolID = *np.Id + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should handle re-DELETE on nodepool idempotently without changing deleted_time or generation", func(ctx context.Context) { + ginkgo.By("sending first DELETE request") + firstDelete, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred(), "first DELETE should succeed with 202") + Expect(firstDelete.DeletedTime).NotTo(BeNil()) + originalDeletedTime := *firstDelete.DeletedTime + originalGeneration := firstDelete.Generation + + ginkgo.By("sending second DELETE request") + secondDelete, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred(), "second DELETE should succeed with 202") + Expect(secondDelete.DeletedTime).NotTo(BeNil()) + Expect(*secondDelete.DeletedTime).To(Equal(originalDeletedTime), "deleted_time should not change on re-DELETE") + Expect(secondDelete.Generation).To(Equal(originalGeneration), "generation should not increment on re-DELETE") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) + +var _ = ginkgo.Describe("[Suite: nodepool][delete] DELETE Non-Existent Nodepool", + ginkgo.Label(labels.Tier1, labels.Negative), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster for valid cluster_id path parameter") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + }) + + ginkgo.It("should return 404 when deleting a non-existent nodepool", func(ctx context.Context) { + ginkgo.By("sending DELETE for a non-existent nodepool ID") + _, err := h.Client.DeleteNodePool(ctx, clusterID, "non-existent-nodepool-id-12345") + var httpErr *client.HTTPError + Expect(errors.As(err, &httpErr)).To(BeTrue(), "error should be HTTPError") + Expect(httpErr.StatusCode).To(Equal(http.StatusNotFound), + "DELETE on non-existent nodepool should return 404") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/nodepool/delete_visibility.go b/e2e/nodepool/delete_visibility.go new file mode 100644 index 0000000..8fce21c --- /dev/null +++ b/e2e/nodepool/delete_visibility.go @@ -0,0 +1,123 @@ +package nodepool + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][delete] Soft-Deleted Nodepool Visibility", + ginkgo.Label(labels.Tier1, labels.Disruptive), + ginkgo.Serial, + func() { + var h *helper.Helper + var clusterID string + var activeNPID string + var deletedNPID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating two nodepools and waiting for Reconciled") + np1, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred()) + Expect(np1.Id).NotTo(BeNil()) + activeNPID = *np1.Id + + np2, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred()) + Expect(np2.Id).NotTo(BeNil()) + deletedNPID = *np2.Id + + Eventually(h.PollNodePool(ctx, clusterID, activeNPID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + Eventually(h.PollNodePool(ctx, clusterID, deletedNPID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should remain visible via GET and LIST before hard-delete", func(ctx context.Context) { + ginkgo.By("pausing sentinel-nodepools to freeze reconciliation before soft-delete") + err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelNodePoolsDeployment, 0) + Expect(err).NotTo(HaveOccurred(), "failed to scale sentinel-nodepools to 0") + ginkgo.DeferCleanup(func(ctx context.Context) { + ginkgo.By("restoring sentinel-nodepools to 1 replica") + if err := h.ScaleDeployment(ctx, h.Cfg.Namespace, helper.SentinelNodePoolsDeployment, 1); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to restore sentinel-nodepools: %v\n", err) + } + }) + + ginkgo.By("soft-deleting one nodepool") + deletedNP, err := h.Client.DeleteNodePool(ctx, clusterID, deletedNPID) + Expect(err).NotTo(HaveOccurred()) + Expect(deletedNP.DeletedTime).NotTo(BeNil()) + + ginkgo.By("verifying GET returns the soft-deleted nodepool with deleted_time") + Eventually(func(g Gomega) { + np, err := h.Client.GetNodePool(ctx, clusterID, deletedNPID) + g.Expect(err).NotTo(HaveOccurred(), "GET should return 200, not 404") + g.Expect(np.DeletedTime).NotTo(BeNil(), "nodepool should have deleted_time set") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("verifying LIST includes both active and soft-deleted nodepools") + Eventually(func(g Gomega) { + npList, err := h.Client.ListNodePools(ctx, clusterID) + g.Expect(err).NotTo(HaveOccurred()) + + var foundActive, foundDeleted bool + for _, np := range npList.Items { + if np.Id == nil { + continue + } + if *np.Id == activeNPID { + g.Expect(np.DeletedTime).To(BeNil(), "active nodepool should not have deleted_time") + foundActive = true + } + if *np.Id == deletedNPID { + g.Expect(np.DeletedTime).NotTo(BeNil(), "deleted nodepool should have deleted_time") + foundDeleted = true + } + } + g.Expect(foundActive).To(BeTrue(), "active nodepool should appear in LIST") + g.Expect(foundDeleted).To(BeTrue(), "soft-deleted nodepool should appear in LIST") + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.By("verifying active nodepool is unaffected") + activeNP, err := h.Client.GetNodePool(ctx, clusterID, activeNPID) + Expect(err).NotTo(HaveOccurred()) + Expect(activeNP.DeletedTime).To(BeNil()) + + hasReconciled := h.HasResourceCondition(activeNP.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasReconciled).To(BeTrue(), "active nodepool should remain Reconciled=True") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if cluster, err := h.Client.GetCluster(ctx, clusterID); err == nil && cluster.DeletedTime == nil { + if _, err := h.Client.DeleteCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: API delete failed for cluster %s: %v\n", clusterID, err) + } + } + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/nodepool/update.go b/e2e/nodepool/update.go new file mode 100644 index 0000000..ac7c99c --- /dev/null +++ b/e2e/nodepool/update.go @@ -0,0 +1,97 @@ +package nodepool + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][update] NodePool Update Lifecycle", + ginkgo.Label(labels.Tier0), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating nodepool and waiting for Reconciled at generation 1") + np, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create nodepool") + Expect(np.Id).NotTo(BeNil(), "nodepool ID should be generated") + nodepoolID = *np.Id + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should update nodepool via PATCH, trigger reconciliation, and reach Reconciled at new generation", func(ctx context.Context) { + ginkgo.By("capturing state before PATCH") + npBefore, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + Expect(npBefore.Generation).To(Equal(int32(1)), "nodepool should be at generation 1 before update") + parentBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.By("sending PATCH to update nodepool spec") + patchedNP, err := h.Client.PatchNodePoolFromPayload(ctx, clusterID, nodepoolID, h.TestDataPath("payloads/nodepools/nodepool-patch.json")) + Expect(err).NotTo(HaveOccurred(), "PATCH request should succeed") + expectedGen := npBefore.Generation + 1 + Expect(patchedNP.Generation).To(Equal(expectedGen), "generation should increment after PATCH") + + ginkgo.By("waiting for all nodepool adapters to reconcile at new generation") + Eventually(h.PollNodePoolAdapterStatuses(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.NodePool, expectedGen)) + + ginkgo.By("verifying nodepool reaches Reconciled=True at new generation") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + finalNP, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + Expect(finalNP.Generation).To(Equal(expectedGen), "final nodepool generation should match expected") + + hasReconciled := h.HasResourceCondition(finalNP.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasReconciled).To(BeTrue(), "nodepool should have Reconciled=True") + + for _, cond := range finalNP.Status.Conditions { + if cond.Type == client.ConditionTypeReconciled { + Expect(cond.ObservedGeneration).To(Equal(expectedGen), "Reconciled condition observed_generation should match expected") + } + } + + ginkgo.By("verifying parent cluster generation is unchanged") + parentCluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(parentCluster.Generation).To(Equal(parentBefore.Generation), "nodepool update should not affect cluster generation") + + hasParentReconciled := h.HasResourceCondition(parentCluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasParentReconciled).To(BeTrue(), "parent cluster should remain Reconciled=True") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/e2e/nodepool/update_edge_cases.go b/e2e/nodepool/update_edge_cases.go new file mode 100644 index 0000000..86e69bc --- /dev/null +++ b/e2e/nodepool/update_edge_cases.go @@ -0,0 +1,103 @@ +package nodepool + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][update] Labels-Only PATCH", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + ginkgo.By("creating cluster and waiting for Reconciled") + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create cluster") + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating nodepool and waiting for Reconciled at generation 1") + np, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to create nodepool") + Expect(np.Id).NotTo(BeNil()) + nodepoolID = *np.Id + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should bump generation and trigger reconciliation from a labels-only PATCH", func(ctx context.Context) { + ginkgo.By("capturing state before labels-only PATCH") + npBefore, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + specBefore := npBefore.Spec + parentBefore, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.By("sending labels-only PATCH to nodepool (preserving existing labels)") + newLabels := make(map[string]string) + if npBefore.Labels != nil { + for k, v := range *npBefore.Labels { + newLabels[k] = v + } + } + newLabels["env"] = "staging" + newLabels["pool-type"] = "gpu" + patchedNP, err := h.Client.PatchNodePool(ctx, clusterID, nodepoolID, openapi.NodePoolPatchRequest{ + Labels: &newLabels, + }) + Expect(err).NotTo(HaveOccurred(), "labels-only PATCH should succeed") + Expect(patchedNP.Generation).To(Equal(int32(2)), + "generation should increment after labels-only PATCH") + + ginkgo.By("waiting for all nodepool adapters to reconcile at generation 2") + Eventually(h.PollNodePoolAdapterStatuses(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(helper.HaveAllAdaptersAtGeneration(h.Cfg.Adapters.NodePool, int32(2))) + + ginkgo.By("verifying nodepool reaches Reconciled=True with updated labels and unchanged spec") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + finalNP, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + Expect(finalNP.Labels).NotTo(BeNil(), "nodepool should have labels") + Expect((*finalNP.Labels)["env"]).To(Equal("staging")) + Expect((*finalNP.Labels)["pool-type"]).To(Equal("gpu")) + Expect(finalNP.Spec).To(Equal(specBefore), + "spec should be unchanged after labels-only PATCH") + + ginkgo.By("verifying parent cluster generation is unchanged") + parentCluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + Expect(parentCluster.Generation).To(Equal(parentBefore.Generation), + "nodepool labels PATCH should not affect cluster generation") + + hasParentReconciled := h.HasResourceCondition(parentCluster.Status.Conditions, client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue) + Expect(hasParentReconciled).To(BeTrue(), "parent cluster should remain Reconciled=True") + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + ginkgo.By("cleaning up cluster " + clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: cleanup failed for cluster %s: %v\n", clusterID, err) + } + }) + }, +) diff --git a/pkg/client/client.go b/pkg/client/client.go index 2d3a338..7a19c4b 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -32,6 +32,20 @@ func NewHyperFleetClient(baseURL string, httpClient *http.Client) (*HyperFleetCl }, nil } +// HTTPError represents an unexpected HTTP status code from the API. +type HTTPError struct { + StatusCode int + Action string + Body string +} + +func (e *HTTPError) Error() string { + if e.Body != "" { + return fmt.Sprintf("unexpected status code %d for %s: %s", e.StatusCode, e.Action, e.Body) + } + return fmt.Sprintf("unexpected status code %d for %s", e.StatusCode, e.Action) +} + // handleHTTPResponse is a generic helper for processing HTTP responses. // It handles status code validation, response body decoding, and error formatting. func handleHTTPResponse[T any](resp *http.Response, expectedStatus int, action string) (*T, error) { @@ -40,11 +54,17 @@ func handleHTTPResponse[T any](resp *http.Response, expectedStatus int, action s if resp.StatusCode != expectedStatus { body, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("unexpected status code %d for %s (failed to read response body: %w)", - resp.StatusCode, action, err) + return nil, &HTTPError{ + StatusCode: resp.StatusCode, + Action: action, + Body: fmt.Sprintf("failed to read error response body: %v", err), + } + } + return nil, &HTTPError{ + StatusCode: resp.StatusCode, + Action: action, + Body: string(body), } - return nil, fmt.Errorf("unexpected status code %d for %s: %s", - resp.StatusCode, action, string(body)) } var result T diff --git a/pkg/client/cluster.go b/pkg/client/cluster.go index bab5fa4..55797d3 100644 --- a/pkg/client/cluster.go +++ b/pkg/client/cluster.go @@ -69,13 +69,55 @@ func (c *HyperFleetClient) CreateClusterFromPayload(ctx context.Context, payload return c.CreateCluster(ctx, *req) } -// DeleteCluster deletes a cluster by ID. -// TODO(API): Implement cluster deletion once HyperFleet API supports DELETE operations. -// Currently this is a no-op as the API does not support cluster deletion yet. -// Resources will remain in the system until manually cleaned up. -func (c *HyperFleetClient) DeleteCluster(ctx context.Context, clusterID string) error { - // HyperFleet API does not yet support cluster deletion - // Log this as info (not error) since it's expected behavior - logger.Debug("cluster deletion not supported by API - skipping", "cluster_id", clusterID) - return nil +// DeleteCluster soft-deletes a cluster by ID (sets deleted_time, returns 202). +func (c *HyperFleetClient) DeleteCluster(ctx context.Context, clusterID string) (*openapi.Cluster, error) { + logger.Info("deleting cluster", "cluster_id", clusterID) + + resp, err := c.DeleteClusterById(ctx, clusterID) + if err != nil { + return nil, fmt.Errorf("failed to delete cluster: %w", err) + } + + cluster, err := handleHTTPResponse[openapi.Cluster](resp, http.StatusAccepted, "delete cluster") + if err != nil { + return nil, err + } + + logger.Info("cluster deleted", "cluster_id", clusterID) + return cluster, nil +} + +// PatchCluster updates a cluster via PATCH. +func (c *HyperFleetClient) PatchCluster(ctx context.Context, clusterID string, req openapi.ClusterPatchRequest) (*openapi.Cluster, error) { + logger.Info("patching cluster", "cluster_id", clusterID) + + resp, err := c.PatchClusterById(ctx, clusterID, req) + if err != nil { + return nil, fmt.Errorf("failed to patch cluster: %w", err) + } + + cluster, err := handleHTTPResponse[openapi.Cluster](resp, http.StatusOK, "patch cluster") + if err != nil { + return nil, err + } + + logger.Info("cluster patched", "cluster_id", clusterID, "generation", cluster.Generation) + return cluster, nil +} + +// PatchClusterFromPayload patches a cluster from a JSON payload file. +func (c *HyperFleetClient) PatchClusterFromPayload(ctx context.Context, clusterID, payloadPath string) (*openapi.Cluster, error) { + logger.Debug("loading cluster patch payload", "payload_path", payloadPath) + + req, err := loadPayloadFromFile[openapi.ClusterPatchRequest](payloadPath) + if err != nil { + return nil, err + } + + return c.PatchCluster(ctx, clusterID, *req) +} + +// PatchClusterRaw sends a PATCH request and returns the raw HTTP response for status inspection. +func (c *HyperFleetClient) PatchClusterRaw(ctx context.Context, clusterID string, req openapi.ClusterPatchRequest) (*http.Response, error) { + return c.PatchClusterById(ctx, clusterID, req) } diff --git a/pkg/client/constants.go b/pkg/client/constants.go index 32aa90c..045f8ec 100644 --- a/pkg/client/constants.go +++ b/pkg/client/constants.go @@ -13,6 +13,7 @@ const ( ConditionTypeApplied = "Applied" // Resources created successfully ConditionTypeAvailable = "Available" // Work completed successfully ConditionTypeHealth = "Health" // No unexpected errors + ConditionTypeFinalized = "Finalized" // Cleanup confirmed during deletion ) // Condition types used by cluster-level resources (clusters, nodepools) diff --git a/pkg/client/nodepool.go b/pkg/client/nodepool.go index 608c3eb..8671449 100644 --- a/pkg/client/nodepool.go +++ b/pkg/client/nodepool.go @@ -69,13 +69,55 @@ func (c *HyperFleetClient) CreateNodePoolFromPayload(ctx context.Context, cluste return c.CreateNodePool(ctx, clusterID, *req) } -// DeleteNodePool deletes a nodepool by ID. -// TODO(API): Implement nodepool deletion once HyperFleet API supports DELETE operations. -// Currently this is a no-op as the API does not support nodepool deletion yet. -// Resources will remain in the system until manually cleaned up. -func (c *HyperFleetClient) DeleteNodePool(ctx context.Context, clusterID, nodepoolID string) error { - // HyperFleet API does not yet support nodepool deletion - // Log this as info (not error) since it's expected behavior - logger.Debug("nodepool deletion not supported by API - skipping", "cluster_id", clusterID, "nodepool_id", nodepoolID) - return nil +// DeleteNodePool soft-deletes a nodepool by ID (sets deleted_time, returns 202). +func (c *HyperFleetClient) DeleteNodePool(ctx context.Context, clusterID, nodepoolID string) (*openapi.NodePool, error) { + logger.Info("deleting nodepool", "cluster_id", clusterID, "nodepool_id", nodepoolID) + + resp, err := c.DeleteNodePoolById(ctx, clusterID, nodepoolID) + if err != nil { + return nil, fmt.Errorf("failed to delete nodepool: %w", err) + } + + nodepool, err := handleHTTPResponse[openapi.NodePool](resp, http.StatusAccepted, "delete nodepool") + if err != nil { + return nil, err + } + + logger.Info("nodepool deleted", "cluster_id", clusterID, "nodepool_id", nodepoolID) + return nodepool, nil +} + +// PatchNodePool updates a nodepool via PATCH. +func (c *HyperFleetClient) PatchNodePool(ctx context.Context, clusterID, nodepoolID string, req openapi.NodePoolPatchRequest) (*openapi.NodePool, error) { + logger.Info("patching nodepool", "cluster_id", clusterID, "nodepool_id", nodepoolID) + + resp, err := c.PatchNodePoolById(ctx, clusterID, nodepoolID, req) + if err != nil { + return nil, fmt.Errorf("failed to patch nodepool: %w", err) + } + + nodepool, err := handleHTTPResponse[openapi.NodePool](resp, http.StatusOK, "patch nodepool") + if err != nil { + return nil, err + } + + logger.Info("nodepool patched", "cluster_id", clusterID, "nodepool_id", nodepoolID, "generation", nodepool.Generation) + return nodepool, nil +} + +// PatchNodePoolFromPayload patches a nodepool from a JSON payload file. +func (c *HyperFleetClient) PatchNodePoolFromPayload(ctx context.Context, clusterID, nodepoolID, payloadPath string) (*openapi.NodePool, error) { + logger.Debug("loading nodepool patch payload", "payload_path", payloadPath) + + req, err := loadPayloadFromFile[openapi.NodePoolPatchRequest](payloadPath) + if err != nil { + return nil, err + } + + return c.PatchNodePool(ctx, clusterID, nodepoolID, *req) +} + +// PatchNodePoolRaw sends a PATCH request and returns the raw HTTP response for status inspection. +func (c *HyperFleetClient) PatchNodePoolRaw(ctx context.Context, clusterID, nodepoolID string, req openapi.NodePoolPatchRequest) (*http.Response, error) { + return c.PatchNodePoolById(ctx, clusterID, nodepoolID, req) } diff --git a/pkg/helper/constants.go b/pkg/helper/constants.go index 3e18c8b..b807c03 100644 --- a/pkg/helper/constants.go +++ b/pkg/helper/constants.go @@ -11,5 +11,12 @@ const ( ResourceTypeClusters = "clusters" ResourceTypeNodepools = "nodepools" + // SentinelClustersDeployment is the Kubernetes deployment name for the cluster sentinel. + // Format: - = sentinel-clusters-hyperfleet-sentinel + SentinelClustersDeployment = "sentinel-clusters-hyperfleet-sentinel" + + // SentinelNodePoolsDeployment is the Kubernetes deployment name for the nodepool sentinel. + SentinelNodePoolsDeployment = "sentinel-nodepools-hyperfleet-sentinel" + defaultGCPProjectID = "hcm-hyperfleet" ) diff --git a/pkg/helper/helper.go b/pkg/helper/helper.go index 2654c7e..0824c15 100644 --- a/pkg/helper/helper.go +++ b/pkg/helper/helper.go @@ -8,7 +8,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" k8sclient "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client/kubernetes" "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client/maestro" @@ -175,16 +174,6 @@ func (h *Helper) CleanupTestCluster(ctx context.Context, clusterID string) error return nil } -// GetTestNodePool creates a nodepool on the specified cluster from a payload file -func (h *Helper) GetTestNodePool(ctx context.Context, clusterID, payloadPath string) (*openapi.NodePool, error) { - return h.Client.CreateNodePoolFromPayload(ctx, clusterID, payloadPath) -} - -// CleanupTestNodePool cleans up test nodepool -func (h *Helper) CleanupTestNodePool(ctx context.Context, clusterID, nodepoolID string) error { - return h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) -} - // GetMaestroClient returns the Maestro client, initializing it lazily on first access // This avoids the overhead of K8s service discovery for test suites that don't use Maestro func (h *Helper) GetMaestroClient() *maestro.Client { diff --git a/pkg/helper/matchers.go b/pkg/helper/matchers.go new file mode 100644 index 0000000..4799347 --- /dev/null +++ b/pkg/helper/matchers.go @@ -0,0 +1,195 @@ +package helper + +import ( + "fmt" + "strings" + + "github.com/onsi/gomega/types" + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" +) + +// HaveResourceCondition matches a *Cluster or *NodePool that has the specified condition type and status. +func HaveResourceCondition(condType string, status openapi.ResourceConditionStatus) types.GomegaMatcher { + return &resourceConditionMatcher{condType: condType, status: status} +} + +type resourceConditionMatcher struct { + condType string + status openapi.ResourceConditionStatus + actual string +} + +func (m *resourceConditionMatcher) Match(actual any) (bool, error) { + conditions, err := extractResourceConditions(actual) + if err != nil { + return false, err + } + if conditions == nil { + m.actual = "" + return false, nil + } + for _, c := range conditions { + if c.Type == m.condType && c.Status == m.status { + return true, nil + } + } + m.actual = formatResourceConditions(conditions) + return false, nil +} + +func (m *resourceConditionMatcher) FailureMessage(_ any) string { + return fmt.Sprintf("expected condition %s=%s but got: %s", m.condType, m.status, m.actual) +} + +func (m *resourceConditionMatcher) NegatedFailureMessage(_ any) string { + return fmt.Sprintf("expected NOT to have condition %s=%s", m.condType, m.status) +} + +// HaveAllAdaptersWithCondition matches an *AdapterStatusList where every required +// adapter has the specified condition type and status. +func HaveAllAdaptersWithCondition(requiredAdapters []string, condType string, status openapi.AdapterConditionStatus) types.GomegaMatcher { + return &allAdaptersConditionMatcher{ + adapters: requiredAdapters, + condType: condType, + status: status, + } +} + +type allAdaptersConditionMatcher struct { + adapters []string + condType string + status openapi.AdapterConditionStatus + missing []string +} + +func (m *allAdaptersConditionMatcher) Match(actual any) (bool, error) { + list, ok := actual.(*openapi.AdapterStatusList) + if !ok { + return false, fmt.Errorf("HaveAllAdaptersWithCondition expects *AdapterStatusList, got %T", actual) + } + if list == nil { + return false, fmt.Errorf("HaveAllAdaptersWithCondition expects non-nil *AdapterStatusList") + } + + m.missing = nil + adapterMap := make(map[string]openapi.AdapterStatus, len(list.Items)) + for _, s := range list.Items { + adapterMap[s.Adapter] = s + } + + for _, name := range m.adapters { + adapter, exists := adapterMap[name] + if !exists { + m.missing = append(m.missing, name+" (not found)") + continue + } + if !hasAdapterCond(adapter.Conditions, m.condType, m.status) { + m.missing = append(m.missing, name) + } + } + return len(m.missing) == 0, nil +} + +func (m *allAdaptersConditionMatcher) FailureMessage(_ any) string { + return fmt.Sprintf("adapters missing %s=%s: %s", m.condType, m.status, strings.Join(m.missing, ", ")) +} + +func (m *allAdaptersConditionMatcher) NegatedFailureMessage(_ any) string { + return fmt.Sprintf("expected some adapters NOT to have %s=%s", m.condType, m.status) +} + +// HaveAllAdaptersAtGeneration matches an *AdapterStatusList where every required +// adapter has observed the given generation with Applied=True, Available=True, Health=True. +func HaveAllAdaptersAtGeneration(requiredAdapters []string, generation int32) types.GomegaMatcher { + return &allAdaptersGenerationMatcher{ + adapters: requiredAdapters, + generation: generation, + } +} + +type allAdaptersGenerationMatcher struct { + adapters []string + generation int32 + failures []string +} + +func (m *allAdaptersGenerationMatcher) Match(actual any) (bool, error) { + list, ok := actual.(*openapi.AdapterStatusList) + if !ok { + return false, fmt.Errorf("HaveAllAdaptersAtGeneration expects *AdapterStatusList, got %T", actual) + } + if list == nil { + return false, fmt.Errorf("HaveAllAdaptersAtGeneration expects non-nil *AdapterStatusList") + } + + m.failures = nil + adapterMap := make(map[string]openapi.AdapterStatus, len(list.Items)) + for _, s := range list.Items { + adapterMap[s.Adapter] = s + } + + for _, name := range m.adapters { + adapter, exists := adapterMap[name] + if !exists { + m.failures = append(m.failures, name+": not found") + continue + } + if adapter.ObservedGeneration != m.generation { + m.failures = append(m.failures, fmt.Sprintf("%s: generation %d (want %d)", name, adapter.ObservedGeneration, m.generation)) + continue + } + for _, ct := range []string{client.ConditionTypeApplied, client.ConditionTypeAvailable, client.ConditionTypeHealth} { + if !hasAdapterCond(adapter.Conditions, ct, openapi.AdapterConditionStatusTrue) { + m.failures = append(m.failures, fmt.Sprintf("%s: %s!=True", name, ct)) + } + } + } + return len(m.failures) == 0, nil +} + +func (m *allAdaptersGenerationMatcher) FailureMessage(_ any) string { + return fmt.Sprintf("adapters not at generation %d: %s", m.generation, strings.Join(m.failures, "; ")) +} + +func (m *allAdaptersGenerationMatcher) NegatedFailureMessage(_ any) string { + return fmt.Sprintf("expected adapters NOT at generation %d", m.generation) +} + +func hasAdapterCond(conditions []openapi.AdapterCondition, condType string, status openapi.AdapterConditionStatus) bool { + for _, c := range conditions { + if c.Type == condType && c.Status == status { + return true + } + } + return false +} + +func extractResourceConditions(actual any) ([]openapi.ResourceCondition, error) { + switch v := actual.(type) { + case *openapi.Cluster: + if v == nil { + return nil, nil + } + return v.Status.Conditions, nil + case *openapi.NodePool: + if v == nil { + return nil, nil + } + return v.Status.Conditions, nil + default: + return nil, fmt.Errorf("HaveResourceCondition expects *Cluster or *NodePool, got %T", actual) + } +} + +func formatResourceConditions(conditions []openapi.ResourceCondition) string { + if len(conditions) == 0 { + return "" + } + parts := make([]string, 0, len(conditions)) + for _, c := range conditions { + parts = append(parts, fmt.Sprintf("%s=%s", c.Type, c.Status)) + } + return strings.Join(parts, ", ") +} diff --git a/pkg/helper/pollers.go b/pkg/helper/pollers.go new file mode 100644 index 0000000..3de49cd --- /dev/null +++ b/pkg/helper/pollers.go @@ -0,0 +1,77 @@ +package helper + +import ( + "context" + "errors" + "net/http" + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" +) + +// PollCluster returns a polling function for use with Eventually. +func (h *Helper) PollCluster(ctx context.Context, id string) func() (*openapi.Cluster, error) { + return func() (*openapi.Cluster, error) { + return h.Client.GetCluster(ctx, id) + } +} + +// PollNodePool returns a polling function for use with Eventually. +func (h *Helper) PollNodePool(ctx context.Context, clusterID, npID string) func() (*openapi.NodePool, error) { + return func() (*openapi.NodePool, error) { + return h.Client.GetNodePool(ctx, clusterID, npID) + } +} + +// PollClusterAdapterStatuses returns a polling function for cluster adapter status checks. +func (h *Helper) PollClusterAdapterStatuses(ctx context.Context, clusterID string) func() (*openapi.AdapterStatusList, error) { + return func() (*openapi.AdapterStatusList, error) { + return h.Client.GetClusterStatuses(ctx, clusterID) + } +} + +// PollNodePoolAdapterStatuses returns a polling function for nodepool adapter status checks. +func (h *Helper) PollNodePoolAdapterStatuses(ctx context.Context, clusterID, npID string) func() (*openapi.AdapterStatusList, error) { + return func() (*openapi.AdapterStatusList, error) { + return h.Client.GetNodePoolStatuses(ctx, clusterID, npID) + } +} + +// PollClusterHTTPStatus returns a polling function that yields the HTTP status code. +// 200 when cluster exists, 404 when gone. Useful for hard-delete assertions. +func (h *Helper) PollClusterHTTPStatus(ctx context.Context, id string) func() (int, error) { + return func() (int, error) { + _, err := h.Client.GetCluster(ctx, id) + if err == nil { + return http.StatusOK, nil + } + var httpErr *client.HTTPError + if errors.As(err, &httpErr) { + return httpErr.StatusCode, nil + } + return 0, err + } +} + +// PollNodePoolHTTPStatus returns a polling function that yields the HTTP status code. +// 200 when nodepool exists, 404 when gone. Useful for hard-delete assertions. +func (h *Helper) PollNodePoolHTTPStatus(ctx context.Context, clusterID, npID string) func() (int, error) { + return func() (int, error) { + _, err := h.Client.GetNodePool(ctx, clusterID, npID) + if err == nil { + return http.StatusOK, nil + } + var httpErr *client.HTTPError + if errors.As(err, &httpErr) { + return httpErr.StatusCode, nil + } + return 0, err + } +} + +// PollNamespacesByPrefix returns a polling function for namespace existence checks. +func (h *Helper) PollNamespacesByPrefix(ctx context.Context, prefix string) func() ([]string, error) { + return func() ([]string, error) { + return h.K8sClient.FindNamespacesByPrefix(ctx, prefix) + } +} diff --git a/pkg/helper/validation.go b/pkg/helper/validation.go index 21f0dab..90536eb 100644 --- a/pkg/helper/validation.go +++ b/pkg/helper/validation.go @@ -8,12 +8,7 @@ import ( // HasAdapterCondition checks if an adapter condition with the given type and status exists in the conditions list func (h *Helper) HasAdapterCondition(conditions []openapi.AdapterCondition, condType string, status openapi.AdapterConditionStatus) bool { - for _, cond := range conditions { - if cond.Type == condType && cond.Status == status { - return true - } - } - return false + return hasAdapterCond(conditions, condType, status) } // HasResourceCondition checks if a resource condition with the given type and status exists in the conditions list diff --git a/pkg/helper/wait.go b/pkg/helper/wait.go deleted file mode 100644 index 40ac934..0000000 --- a/pkg/helper/wait.go +++ /dev/null @@ -1,104 +0,0 @@ -package helper - -import ( - "context" - "fmt" - "time" - - . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability - - "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" - "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/logger" -) - -// WaitForClusterCondition waits for a cluster to have a specific condition with the expected status -func (h *Helper) WaitForClusterCondition(ctx context.Context, clusterID string, conditionType string, expectedStatus openapi.ResourceConditionStatus, timeout time.Duration) error { - logger.Debug("waiting for cluster condition", "cluster_id", clusterID, "condition_type", conditionType, "expected_status", expectedStatus, "timeout", timeout) - - Eventually(func(g Gomega) { - cluster, err := h.Client.GetCluster(ctx, clusterID) - g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster") - g.Expect(cluster).NotTo(BeNil(), "cluster is nil") - g.Expect(cluster.Status).NotTo(BeNil(), "cluster.Status is nil") - - // Check if the condition exists with the expected status - found := false - for _, cond := range cluster.Status.Conditions { - if cond.Type == conditionType && cond.Status == expectedStatus { - found = true - break - } - } - g.Expect(found).To(BeTrue(), - fmt.Sprintf("cluster does not have condition %s=%s", conditionType, expectedStatus)) - }, timeout, h.Cfg.Polling.Interval).Should(Succeed()) - - logger.Info("cluster reached target condition", "cluster_id", clusterID, "condition_type", conditionType, "status", expectedStatus) - return nil -} - -// WaitForAdapterCondition waits for a specific adapter condition to be in the expected status -func (h *Helper) WaitForAdapterCondition(ctx context.Context, clusterID, adapterName, condType string, expectedStatus openapi.AdapterConditionStatus, timeout time.Duration) error { - Eventually(func(g Gomega) { - statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) - g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses") - - // Find the specific adapter - var found bool - for _, status := range statuses.Items { - if status.Adapter == adapterName { - found = true - hasCondition := h.HasAdapterCondition(status.Conditions, condType, expectedStatus) - g.Expect(hasCondition).To(BeTrue(), - fmt.Sprintf("adapter %s does not have condition %s=%s", adapterName, condType, expectedStatus)) - break - } - } - g.Expect(found).To(BeTrue(), fmt.Sprintf("adapter %s not found", adapterName)) - }, timeout, h.Cfg.Polling.Interval).Should(Succeed()) - - return nil -} - -// WaitForAllAdapterConditions waits for all adapters to have the specified condition -func (h *Helper) WaitForAllAdapterConditions(ctx context.Context, clusterID, condType string, expectedStatus openapi.AdapterConditionStatus, timeout time.Duration) error { - Eventually(func(g Gomega) { - statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) - g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses") - - for _, adapterStatus := range statuses.Items { - hasCondition := h.HasAdapterCondition(adapterStatus.Conditions, condType, expectedStatus) - g.Expect(hasCondition).To(BeTrue(), - fmt.Sprintf("adapter %s does not have condition %s=%s", - adapterStatus.Adapter, condType, expectedStatus)) - } - }, timeout, h.Cfg.Polling.Interval).Should(Succeed()) - - return nil -} - -// WaitForNodePoolCondition waits for a nodepool to have a specific condition with the expected status -func (h *Helper) WaitForNodePoolCondition(ctx context.Context, clusterID, nodepoolID string, conditionType string, expectedStatus openapi.ResourceConditionStatus, timeout time.Duration) error { - logger.Debug("waiting for nodepool condition", "cluster_id", clusterID, "nodepool_id", nodepoolID, "condition_type", conditionType, "expected_status", expectedStatus, "timeout", timeout) - - Eventually(func(g Gomega) { - nodepool, err := h.Client.GetNodePool(ctx, clusterID, nodepoolID) - g.Expect(err).NotTo(HaveOccurred(), "failed to get nodepool") - g.Expect(nodepool).NotTo(BeNil(), "nodepool is nil") - g.Expect(nodepool.Status).NotTo(BeNil(), "nodepool.Status is nil") - - // Check if the condition exists with the expected status - found := false - for _, cond := range nodepool.Status.Conditions { - if cond.Type == conditionType && cond.Status == expectedStatus { - found = true - break - } - } - g.Expect(found).To(BeTrue(), - fmt.Sprintf("nodepool does not have condition %s=%s", conditionType, expectedStatus)) - }, timeout, h.Cfg.Polling.Interval).Should(Succeed()) - - logger.Info("nodepool reached target condition", "cluster_id", clusterID, "nodepool_id", nodepoolID, "condition_type", conditionType, "status", expectedStatus) - return nil -} diff --git a/test-design/testcases/delete-cluster.md b/test-design/testcases/delete-cluster.md index 3b767ef..442d6c0 100644 --- a/test-design/testcases/delete-cluster.md +++ b/test-design/testcases/delete-cluster.md @@ -36,10 +36,10 @@ This test validates the complete cluster deletion lifecycle end-to-end. It verif | **Pos/Neg** | Positive | | **Priority** | Tier0 | | **Status** | Draft | -| **Automation** | Not Automated | +| **Automation** | Automated | | **Version** | Post-MVP | | **Created** | 2026-04-15 | -| **Updated** | 2026-04-15 | +| **Updated** | 2026-04-30 | --- @@ -159,10 +159,10 @@ This test validates hierarchical deletion behavior. When a cluster is deleted, t | **Pos/Neg** | Positive | | **Priority** | Tier0 | | **Status** | Draft | -| **Automation** | Not Automated | +| **Automation** | Automated | | **Version** | Post-MVP | | **Created** | 2026-04-15 | -| **Updated** | 2026-04-15 | +| **Updated** | 2026-04-30 | --- diff --git a/test-design/testcases/delete-nodepool.md b/test-design/testcases/delete-nodepool.md index 8a1b081..34f26d9 100644 --- a/test-design/testcases/delete-nodepool.md +++ b/test-design/testcases/delete-nodepool.md @@ -26,10 +26,10 @@ This test validates the complete nodepool deletion lifecycle. It verifies that w | **Pos/Neg** | Positive | | **Priority** | Tier0 | | **Status** | Draft | -| **Automation** | Not Automated | +| **Automation** | Automated | | **Version** | Post-MVP | | **Created** | 2026-04-15 | -| **Updated** | 2026-04-15 | +| **Updated** | 2026-04-30 | --- diff --git a/test-design/testcases/update-cluster.md b/test-design/testcases/update-cluster.md index b242adc..12c8d53 100644 --- a/test-design/testcases/update-cluster.md +++ b/test-design/testcases/update-cluster.md @@ -23,10 +23,10 @@ This test validates the cluster update lifecycle end-to-end. It verifies that wh | **Pos/Neg** | Positive | | **Priority** | Tier0 | | **Status** | Draft | -| **Automation** | Not Automated | +| **Automation** | Automated | | **Version** | Post-MVP | | **Created** | 2026-04-15 | -| **Updated** | 2026-04-28 | +| **Updated** | 2026-04-30 | --- diff --git a/test-design/testcases/update-nodepool.md b/test-design/testcases/update-nodepool.md index 94c9fc3..4539240 100644 --- a/test-design/testcases/update-nodepool.md +++ b/test-design/testcases/update-nodepool.md @@ -20,10 +20,10 @@ This test validates the nodepool update lifecycle. It verifies that when a PATCH | **Pos/Neg** | Positive | | **Priority** | Tier0 | | **Status** | Draft | -| **Automation** | Not Automated | +| **Automation** | Automated | | **Version** | Post-MVP | | **Created** | 2026-04-15 | -| **Updated** | 2026-04-28 | +| **Updated** | 2026-04-30 | --- diff --git a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml b/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml index 04e1c30..55d7fc8 100644 --- a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml +++ b/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml @@ -21,6 +21,8 @@ preconditions: field: "name" - name: "generationSpec" field: "generation" + - name: "is_deleting" + expression: "has(clusterStatus.deleted_time)" - name: "clusterNotReconciled" expression: | status.conditions.filter(c, c.type == "Reconciled").size() > 0 @@ -34,11 +36,6 @@ preconditions: : now() )).getSeconds() > 300 - - name: "validationCheck" - # Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing) - expression: | - clusterNotReconciled || clusterReconciledTTL - - name: "clusterAdapterStatus" api_call: method: "GET" @@ -49,10 +46,10 @@ preconditions: capture: - name: "clusterJobStatus" field: "{.items[?(@.adapter=='cl-job')].conditions[?(@.type=='Available')].status}" - conditions: - - field: "clusterJobStatus" - operator: "equals" - value: "True" + + - name: "validationCheck" + expression: | + is_deleting || (clusterJobStatus == "True" && (clusterNotReconciled || clusterReconciledTTL)) # Resources with valid K8s manifests resources: @@ -67,6 +64,11 @@ resources: label_selector: hyperfleet.io/resource-type: "deployment" hyperfleet.io/cluster-id: "{{ .clusterId }}" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" # Post-processing with valid CEL expressions post: @@ -96,8 +98,8 @@ post: expression: | has(resources.testDeployment) ? ( resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].status : "Unknown") - : "Unknown" + ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].status : "False") + : "False" reason: expression: | resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") @@ -114,17 +116,55 @@ post: - type: "Health" status: expression: | - adapter.?executionStatus.orValue("") == "success" ? "True" : "False" + adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + ? "True" + : "False" + reason: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") + : adapter.?resourcesSkipped.orValue(false) + ? "ResourcesSkipped" + : "Healthy" + message: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "Adapter failed at phase [" + + adapter.?executionError.?phase.orValue("unknown") + + "] step [" + + adapter.?executionError.?step.orValue("unknown") + + "]: " + + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) + : adapter.?resourcesSkipped.orValue(false) + ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") + : "Adapter execution completed successfully" + # Finalized: True once deployment is confirmed deleted + - type: "Finalized" + status: + expression: | + is_deleting + && adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + && !resources.?testDeployment.hasValue() + ? "True" + : "False" reason: expression: | - adapter.?errorReason.orValue("") != "" ? adapter.?errorReason.orValue("") : "Healthy" + !is_deleting ? "" + : !resources.?testDeployment.hasValue() + ? "CleanupConfirmed" + : "CleanupInProgress" message: expression: | - adapter.?errorMessage.orValue("") != "" ? adapter.?errorMessage.orValue("") : "All adapter operations in progress or completed successfully" + !is_deleting ? "" + : !resources.?testDeployment.hasValue() + ? "All resources deleted; cleanup confirmed" + : "Deletion in progress; waiting for deployment to be removed" # Event generation ID metadata field needs to use expression to avoid interpolation issues observed_generation: expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" + observed_time: '{{ now | date "2006-01-02T15:04:05Z07:00" }}' post_actions: - name: "reportClusterStatus" diff --git a/testdata/adapter-configs/cl-job/adapter-task-config.yaml b/testdata/adapter-configs/cl-job/adapter-task-config.yaml index ffa7c23..38b05e9 100644 --- a/testdata/adapter-configs/cl-job/adapter-task-config.yaml +++ b/testdata/adapter-configs/cl-job/adapter-task-config.yaml @@ -27,6 +27,8 @@ preconditions: field: "generation" - name: "simulateResult" # possible values: success (default), failure, hang, crash, invalid-json, missing-status field: "simulateResult" + - name: "is_deleting" + expression: "has(clusterStatus.deleted_time)" - name: "clusterNotReconciled" expression: | status.conditions.filter(c, c.type == "Reconciled").size() > 0 @@ -40,11 +42,6 @@ preconditions: : now() )).getSeconds() > 300 - - name: "validationCheck" - # Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing) - expression: | - clusterNotReconciled || clusterReconciledTTL - - name: "clusterAdapterStatus" api_call: method: "GET" @@ -55,10 +52,10 @@ preconditions: capture: - name: "clusterNamespaceStatus" field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - conditions: - - field: "clusterNamespaceStatus" - operator: "equals" - value: "Active" + + - name: "validationCheck" + expression: | + is_deleting || (clusterNamespaceStatus == "Active" && (clusterNotReconciled || clusterReconciledTTL)) # Resources with valid K8s manifests resources: @@ -76,6 +73,11 @@ resources: label_selector: hyperfleet.io/resource-type: "service-account" hyperfleet.io/cluster-id: "{{ .clusterId }}" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" - name: "jobRole" transport: @@ -88,6 +90,11 @@ resources: label_selector: hyperfleet.io/cluster-id: "{{ .clusterId }}" hyperfleet.io/resource-type: "role" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" - name: "jobRolebinding" transport: @@ -100,6 +107,11 @@ resources: label_selector: hyperfleet.io/cluster-id: "{{ .clusterId }}" hyperfleet.io/resource-type: "role-binding" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" - name: "testJob" transport: @@ -112,6 +124,11 @@ resources: label_selector: hyperfleet.io/cluster-id: "{{ .clusterId }}" hyperfleet.io/resource-type: "job" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" # Post-processing with valid CEL expressions # This example contains multiple resources, we will only report on the conditions of the jobNamespace not to overcomplicate the example @@ -142,8 +159,8 @@ post: expression: | has(resources.testJob) ? ( resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testJob.status.conditions.filter(c, c.type == "Available")[0].status : "Unknown") - : "Unknown" + ? resources.testJob.status.conditions.filter(c, c.type == "Available")[0].status : "False") + : "False" reason: expression: | resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Available") @@ -160,13 +177,54 @@ post: - type: "Health" status: expression: | - adapter.?executionStatus.orValue("") == "success" ? "True" : "False" + adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + ? "True" + : "False" + reason: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") + : adapter.?resourcesSkipped.orValue(false) + ? "ResourcesSkipped" + : "Healthy" + message: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "Adapter failed at phase [" + + adapter.?executionError.?phase.orValue("unknown") + + "] step [" + + adapter.?executionError.?step.orValue("unknown") + + "]: " + + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) + : adapter.?resourcesSkipped.orValue(false) + ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") + : "Adapter execution completed successfully" + # Finalized: True once all job resources are confirmed deleted + - type: "Finalized" + status: + expression: | + is_deleting + && adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + && !resources.?testJob.hasValue() + && !resources.?jobServiceAccount.hasValue() + && !resources.?jobRole.hasValue() + && !resources.?jobRolebinding.hasValue() + ? "True" + : "False" reason: expression: | - adapter.?errorReason.orValue("") != "" ? adapter.?errorReason.orValue("") : "Healthy" + !is_deleting ? "" + : !resources.?testJob.hasValue() && !resources.?jobServiceAccount.hasValue() && !resources.?jobRole.hasValue() && !resources.?jobRolebinding.hasValue() + ? "CleanupConfirmed" + : "CleanupInProgress" message: expression: | - adapter.?errorMessage.orValue("") != "" ? adapter.?errorMessage.orValue("") : "All adapter operations in progress or completed successfully" + !is_deleting ? "" + : !resources.?testJob.hasValue() && !resources.?jobServiceAccount.hasValue() && !resources.?jobRole.hasValue() && !resources.?jobRolebinding.hasValue() + ? "All resources deleted; cleanup confirmed" + : "Deletion in progress; waiting for resources to be removed" # Event generation ID metadata field needs to use expression to avoid interpolation issues observed_generation: expression: "generationSpec" diff --git a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml b/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml index afe4e9d..65def33 100644 --- a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml +++ b/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml @@ -33,6 +33,8 @@ preconditions: field: "generation" - name: "timestamp" field: "created_time" + - name: "is_deleting" + expression: "has(clusterStatus.deleted_time)" - name: "reconciledConditionStatus" expression: | status.conditions.filter(c, c.type == "Reconciled").size() > 0 @@ -50,16 +52,9 @@ preconditions: : [] - # Structured conditions with valid operators - conditions: - - field: "reconciledConditionStatus" - operator: "equals" - value: "False" - - name: "validationCheck" - # Valid CEL expression expression: | - reconciledConditionStatus == "False" + is_deleting || reconciledConditionStatus == "False" # Resources with valid K8s manifests resources: @@ -76,6 +71,11 @@ resources: # Discover the ResourceBundle (ManifestWork) by name from Maestro discovery: by_name: "{{ .clusterId }}-{{ .adapter.name }}" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" # Discover nested resources deployed by the ManifestWork nested_discoveries: @@ -166,6 +166,29 @@ post: ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") : "Adapter execution completed successfully" + # Finalized: True once ManifestWork is confirmed deleted + - type: "Finalized" + status: + expression: | + is_deleting + && adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + && !resources.?resource0.hasValue() + ? "True" + : "False" + reason: + expression: | + !is_deleting ? "" + : !resources.?resource0.hasValue() + ? "CleanupConfirmed" + : "CleanupInProgress" + message: + expression: | + !is_deleting ? "" + : !resources.?resource0.hasValue() + ? "All resources deleted; cleanup confirmed" + : "Deletion in progress; waiting for ManifestWork to be removed" + observed_generation: expression: "generation" observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" diff --git a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml b/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml index 0f62182..ef7d9e4 100644 --- a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml +++ b/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml @@ -31,6 +31,8 @@ preconditions: field: "name" - name: "generationSpec" field: "generation" + - name: "is_deleting" + expression: "has(clusterStatus.deleted_time)" - name: "clusterNotReconciled" expression: | status.conditions.filter(c, c.type == "Reconciled").size() > 0 @@ -47,7 +49,7 @@ preconditions: - name: "validationCheck" # Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing) expression: | - clusterNotReconciled || clusterReconciledTTL + is_deleting || clusterNotReconciled || clusterReconciledTTL # Resources with valid K8s manifests resources: @@ -73,6 +75,11 @@ resources: label_selector: hyperfleet.io/cluster-id: "{{ .clusterId }}" hyperfleet.io/cluster-name: "{{ .clusterName }}" + lifecycle: + delete: + propagationPolicy: Foreground + when: + expression: "is_deleting" # Post-processing with valid CEL expressions # This example contains multiple resources, we will only report on the conditions of the jobNamespace not to overcomplicate the example @@ -112,13 +119,51 @@ post: - type: "Health" status: expression: | - adapter.?executionStatus.orValue("") == "success" ? "True" : "False" + adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + ? "True" + : "False" reason: expression: | - adapter.?errorReason.orValue("") != "" ? adapter.?errorReason.orValue("") : "Healthy" + adapter.?executionStatus.orValue("") != "success" + ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") + : adapter.?resourcesSkipped.orValue(false) + ? "ResourcesSkipped" + : "Healthy" message: expression: | - adapter.?errorMessage.orValue("") != "" ? adapter.?errorMessage.orValue("") : "All adapter operations in progress or completed successfully" + adapter.?executionStatus.orValue("") != "success" + ? "Adapter failed at phase [" + + adapter.?executionError.?phase.orValue("unknown") + + "] step [" + + adapter.?executionError.?step.orValue("unknown") + + "]: " + + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) + : adapter.?resourcesSkipped.orValue(false) + ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") + : "Adapter execution completed successfully" + # Finalized: True once namespace is confirmed deleted + - type: "Finalized" + status: + expression: | + is_deleting + && adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + && !resources.?clusterNamespace.hasValue() + ? "True" + : "False" + reason: + expression: | + !is_deleting ? "" + : !resources.?clusterNamespace.hasValue() + ? "CleanupConfirmed" + : "CleanupInProgress" + message: + expression: | + !is_deleting ? "" + : !resources.?clusterNamespace.hasValue() + ? "All resources deleted; cleanup confirmed" + : "Deletion in progress; waiting for namespace to be removed" # Event generation ID metadata field needs to use expression to avoid interpolation issues observed_generation: expression: "generationSpec" diff --git a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml b/testdata/adapter-configs/np-configmap/adapter-task-config.yaml index 77906b1..3163c7c 100644 --- a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml +++ b/testdata/adapter-configs/np-configmap/adapter-task-config.yaml @@ -25,6 +25,8 @@ preconditions: field: "name" - name: "generationSpec" field: "generation" + - name: "is_deleting" + expression: "has(nodepoolStatus.deleted_time)" - name: "nodepoolNotReconciled" expression: | status.conditions.filter(c, c.type == "Reconciled").size() > 0 @@ -38,11 +40,6 @@ preconditions: : now() )).getSeconds() > 300 - - name: "validationCheck" - # Precondition passes if nodepool is NOT Reconciled OR if nodepool is Reconciled and stable for >300 seconds since last transition (enables self-healing) - expression: | - nodepoolNotReconciled || nodepoolReconciledTTL - - name: "clusterAdapterStatus" api_call: method: "GET" @@ -53,10 +50,11 @@ preconditions: capture: - name: "clusterNamespaceStatus" field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - conditions: - - field: "clusterNamespaceStatus" - operator: "equals" - value: "Active" + + - name: "validationCheck" + # Precondition passes if deleting OR (namespace active AND (nodepool not reconciled OR reconciled TTL expired)) + expression: | + is_deleting || (clusterNamespaceStatus == "Active" && (nodepoolNotReconciled || nodepoolReconciledTTL)) # Resources with valid K8s manifests resources: @@ -72,6 +70,11 @@ resources: hyperfleet.io/resource-type: "configmap" hyperfleet.io/cluster-id: "{{ .clusterId }}" hyperfleet.io/nodepool-id: "{{ .nodepoolId }}" + lifecycle: + delete: + propagationPolicy: Background + when: + expression: "is_deleting" # Post-processing with valid CEL expressions post: @@ -114,13 +117,51 @@ post: - type: "Health" status: expression: | - adapter.?executionStatus.orValue("") == "success" ? "True" : "False" + adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + ? "True" + : "False" + reason: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") + : adapter.?resourcesSkipped.orValue(false) + ? "ResourcesSkipped" + : "Healthy" + message: + expression: | + adapter.?executionStatus.orValue("") != "success" + ? "Adapter failed at phase [" + + adapter.?executionError.?phase.orValue("unknown") + + "] step [" + + adapter.?executionError.?step.orValue("unknown") + + "]: " + + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) + : adapter.?resourcesSkipped.orValue(false) + ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") + : "Adapter execution completed successfully" + # Finalized: True once configmap is confirmed deleted during deletion + - type: "Finalized" + status: + expression: | + is_deleting + && adapter.?executionStatus.orValue("") == "success" + && !adapter.?resourcesSkipped.orValue(false) + && !resources.?nodepoolConfigMap.hasValue() + ? "True" + : "False" reason: expression: | - adapter.?errorReason.orValue("") != "" ? adapter.?errorReason.orValue("") : "Healthy" + !is_deleting ? "" + : !resources.?nodepoolConfigMap.hasValue() + ? "CleanupConfirmed" + : "CleanupInProgress" message: expression: | - adapter.?errorMessage.orValue("") != "" ? adapter.?errorMessage.orValue("") : "All adapter operations in progress or completed successfully" + !is_deleting ? "" + : !resources.?nodepoolConfigMap.hasValue() + ? "All resources deleted; cleanup confirmed" + : "Deletion in progress; waiting for configmap to be removed" # Event generation ID metadata field needs to use expression to avoid interpolation issues observed_generation: expression: "generationSpec" diff --git a/testdata/payloads/clusters/cluster-patch.json b/testdata/payloads/clusters/cluster-patch.json new file mode 100644 index 0000000..a5e8c40 --- /dev/null +++ b/testdata/payloads/clusters/cluster-patch.json @@ -0,0 +1,50 @@ +{ + "spec": { + "platform": { + "type": "gcp", + "gcp": { + "projectID": "my-gcp-project", + "region": "us-central1", + "zone": "us-central1-a", + "network": "default", + "subnet": "default-subnet", + "subnets": [ + { + "id": "subnet-control-plane-01", + "name": "control-plane", + "cidr": "10.0.1.0/24", + "role": "control-plane" + }, + { + "id": "subnet-worker-01", + "name": "worker-nodes", + "cidr": "10.0.2.0/24", + "role": "worker" + }, + { + "id": "subnet-service-01", + "name": "service-mesh", + "cidr": "10.0.3.0/24", + "role": "service" + } + ] + } + }, + "release": { + "image": "registry.redhat.io/openshift4/ose-cluster-version-operator:v4.15.0", + "version": "4.15.0" + }, + "networking": { + "clusterNetwork": [ + { + "cidr": "10.10.0.0/16", + "hostPrefix": 24 + } + ], + "serviceNetwork": ["10.96.0.0/12"] + }, + "dns": { + "baseDomain": "example.com" + } + } +} diff --git a/testdata/payloads/nodepools/nodepool-patch.json b/testdata/payloads/nodepools/nodepool-patch.json new file mode 100644 index 0000000..c114ebc --- /dev/null +++ b/testdata/payloads/nodepools/nodepool-patch.json @@ -0,0 +1,10 @@ +{ + "spec": { + "replicas": 3, + "machineType": "n1-standard-16", + "labels": { + "node-role": "worker", + "gpu-enabled": "true" + } + } +}