diff --git a/charts/cozystack/templates/_helpers.tpl b/charts/cozystack/templates/_helpers.tpl index d4bc548b..a51e3821 100644 --- a/charts/cozystack/templates/_helpers.tpl +++ b/charts/cozystack/templates/_helpers.tpl @@ -321,6 +321,18 @@ mtu: {{ $link.spec.mtu }} {{- else if eq $kind "bond" }} {{- $bondMaster := $link.spec.bondMaster }} {{- $slaves := fromJsonArray (include "talm.discovered.bond_slaves" $link.spec.index) }} +{{- if not $slaves }} +{{- /* Talos's link controller can auto-create a bond stub in COSI + link state before any operator configuration enslaves + physical NICs to it: the master link carries a bondMaster + spec but no other link has slaveKind: bond + masterIndex + pointing at it. Emitting BondConfig with empty `links:` + produces a document Talos rejects on apply ("at least one + link must be specified"). Treat empty-slaves as not-a- + user-bond and skip the document. An operator who intends + a bond must declare its slaves via a per-node body overlay; + until then the discovered stub is not promoted to config. */ -}} +{{- else }} --- apiVersion: v1alpha1 kind: BondConfig @@ -362,6 +374,7 @@ routes: {{- if $link.spec.mtu }} mtu: {{ $link.spec.mtu }} {{- end }} +{{- end }} {{- else if eq $kind "vlan" }} {{- $parentLinkName := include "talm.discovered.parent_link_name" $linkName }} {{- $vlanID := include "talm.discovered.vlan_id" $linkName }} diff --git a/charts/generic/templates/_helpers.tpl b/charts/generic/templates/_helpers.tpl index cc22a2ca..9b7c29bf 100644 --- a/charts/generic/templates/_helpers.tpl +++ b/charts/generic/templates/_helpers.tpl @@ -245,6 +245,18 @@ mtu: {{ $link.spec.mtu }} {{- else if eq $kind "bond" }} {{- $bondMaster := $link.spec.bondMaster }} {{- $slaves := fromJsonArray (include "talm.discovered.bond_slaves" $link.spec.index) }} +{{- if not $slaves }} +{{- /* Talos's link controller can auto-create a bond stub in COSI + link state before any operator configuration enslaves + physical NICs to it: the master link carries a bondMaster + spec but no other link has slaveKind: bond + masterIndex + pointing at it. Emitting BondConfig with empty `links:` + produces a document Talos rejects on apply ("at least one + link must be specified"). Treat empty-slaves as not-a- + user-bond and skip the document. An operator who intends + a bond must declare its slaves via a per-node body overlay; + until then the discovered stub is not promoted to config. */ -}} +{{- else }} --- apiVersion: v1alpha1 kind: BondConfig @@ -286,6 +298,7 @@ routes: {{- if $link.spec.mtu }} mtu: {{ $link.spec.mtu }} {{- end }} +{{- end }} {{- else if eq $kind "vlan" }} {{- $parentLinkName := include "talm.discovered.parent_link_name" $linkName }} {{- $vlanID := include "talm.discovered.vlan_id" $linkName }} diff --git a/pkg/commands/init.go b/pkg/commands/init.go index 0d3f9f4c..407ee40b 100644 --- a/pkg/commands/init.go +++ b/pkg/commands/init.go @@ -81,6 +81,31 @@ const ( reportVerbUpdated = "Updated" ) +// resolveTalosconfigEndpoints picks the endpoint list to embed in +// the generated talosconfig's context. If the operator passed +// --endpoints, those values propagate; otherwise we seed the +// loopback placeholder so the rendered context is yaml-valid (a +// talosconfig context with empty endpoints fails downstream +// client construction). The operator edits the placeholder to a +// real endpoint after init if they did not supply --endpoints. +// +// Earlier versions hardcoded defaultLocalEndpoint at the +// assignment site and silently discarded the operator's +// --endpoints flag; the helper centralises the resolution so a +// future caller (e.g. talosconfig regenerate flow) inherits the +// same shape. +// +// Returns a fresh slice so callers mutating the talosconfig field +// don't alias the operator-visible GlobalArgs.Endpoints across +// init invocations in the same process. +func resolveTalosconfigEndpoints(globalEndpoints []string) []string { + if len(globalEndpoints) > 0 { + return append([]string(nil), globalEndpoints...) + } + + return []string{defaultLocalEndpoint} +} + // initCmdFlags is the package-level flag struct backing the init // subcommand; cobra binds Flags() entries directly to these fields, // which forces a global. The global also exposes the flag values to @@ -595,7 +620,7 @@ var initCmd = &cobra.Command{ return errors.Wrap(err, "generating talos config bundle") } - configBundle.TalosConfig().Contexts[clusterName].Endpoints = []string{defaultLocalEndpoint} + configBundle.TalosConfig().Contexts[clusterName].Endpoints = resolveTalosconfigEndpoints(GlobalArgs.Endpoints) data, err := yaml.Marshal(configBundle.TalosConfig()) if err != nil { diff --git a/pkg/commands/init_test.go b/pkg/commands/init_test.go index 7eadf16a..f6338cb7 100644 --- a/pkg/commands/init_test.go +++ b/pkg/commands/init_test.go @@ -459,6 +459,76 @@ func TestValidateImageOverride(t *testing.T) { // redundant validateFileExists call was dropped — the gate now lives // only inside writeSecureToDestination, and this test would fail if // that inner check were ever removed too. + +// TestResolveTalosconfigEndpoints_GlobalNonEmpty_UsesGlobal pins +// that when the operator passes --endpoints on talm init, the +// talosconfig generated for the new project carries those +// endpoints in its context, NOT a hardcoded loopback. Without +// this contract, init writes a talosconfig with +// `endpoints: - 127.0.0.1` and the operator's --endpoints flag +// is silently discarded. +func TestResolveTalosconfigEndpoints_GlobalNonEmpty_UsesGlobal(t *testing.T) { + t.Parallel() + + got := resolveTalosconfigEndpoints([]string{"10.0.80.201"}) + + if len(got) != 1 || got[0] != "10.0.80.201" { + t.Errorf("non-empty --endpoints must propagate to talosconfig; got %v, want [10.0.80.201]", got) + } +} + +// TestResolveTalosconfigEndpoints_GlobalEmpty_UsesLoopbackFallback +// pins the fallback shape: when --endpoints is omitted the +// generated talosconfig still needs a non-empty endpoints list +// (yaml schema requirement), so we seed the loopback placeholder. +// The operator edits it to a real endpoint after init. +func TestResolveTalosconfigEndpoints_GlobalEmpty_UsesLoopbackFallback(t *testing.T) { + t.Parallel() + + got := resolveTalosconfigEndpoints(nil) + if len(got) != 1 || got[0] != defaultLocalEndpoint { + t.Errorf("empty --endpoints must fall back to defaultLocalEndpoint; got %v, want [%s]", got, defaultLocalEndpoint) + } +} + +// TestResolveTalosconfigEndpoints_ReturnsCopy pins that the +// returned slice is not aliased with the caller's slice. Without +// this, downstream mutation of the talosconfig's endpoints would +// also mutate the operator-visible GlobalArgs.Endpoints between +// init runs in a single process (which the test harness exercises). +func TestResolveTalosconfigEndpoints_ReturnsCopy(t *testing.T) { + t.Parallel() + + input := []string{"10.0.80.201"} + + got := resolveTalosconfigEndpoints(input) + got[0] = "mutated" + + if input[0] == "mutated" { + t.Errorf("returned slice must be a copy; mutating it also mutated the input (aliased)") + } +} + +// TestResolveTalosconfigEndpoints_MultipleEndpoints pins that all +// supplied endpoints propagate (StringSlice flag accepts repeated +// or comma-separated values). Operators with a HA cluster may +// pass multiple --endpoints on init. +func TestResolveTalosconfigEndpoints_MultipleEndpoints(t *testing.T) { + t.Parallel() + + got := resolveTalosconfigEndpoints([]string{"10.0.80.201", "10.0.80.202", "10.0.80.203"}) + if len(got) != 3 { + t.Fatalf("multi-endpoint input must all propagate; got %v", got) + } + + want := []string{"10.0.80.201", "10.0.80.202", "10.0.80.203"} + for i := range want { + if got[i] != want[i] { + t.Errorf("endpoint[%d]: got %q, want %q", i, got[i], want[i]) + } + } +} + func TestWriteSecretsBundleToFile_StillRefusesOverwrite(t *testing.T) { forceOrig := initCmdFlags.force rootOrig := Config.RootDir diff --git a/pkg/engine/contract_network_multidoc_test.go b/pkg/engine/contract_network_multidoc_test.go index cacedb35..49eeab2e 100644 --- a/pkg/engine/contract_network_multidoc_test.go +++ b/pkg/engine/contract_network_multidoc_test.go @@ -203,6 +203,66 @@ func TestContract_NetworkMultidoc_BondRendersBondConfig(t *testing.T) { assertContains(t, out, "miimon: 100") } +// TestContract_NetworkMultidoc_BondWithoutSlaves_SkipsBondConfig +// pins the empty-slaves guardrail in the cozystack preset. Talos's +// link controller can auto-create a bond stub in COSI link state +// before any operator configuration enslaves physical NICs to it; +// the master link carries a `bondMaster` spec, but no other link +// has `slaveKind: bond` + `masterIndex` pointing at it. Without +// the guardrail, `talm.discovered.bond_slaves` returns an empty +// list and the renderer emits `BondConfig{name: bond0, +// links: }` — Talos validates the document on apply and +// rejects with `at least one link must be specified`. The fix is +// to skip the bond emission entirely when slaves is empty: a +// discovered bond without enslaved links is a stub, not a +// user-configured bond. +// +// The test also pins a positive-shape invariant: eth0 (the +// non-enslaved physical link from the fixture) must still surface +// as a LinkConfig with its address. A future regression where the +// empty-slaves guard accidentally swallows ALL link emission would +// pass the negative assertions alone; the positive checks pin +// "skip the bond stub, keep the rest of the link graph". +func TestContract_NetworkMultidoc_BondWithoutSlaves_SkipsBondConfig(t *testing.T) { + out := renderCozystackWith(t, emptyBondTopologyLookup(), map[string]any{ + "advertisedSubnets": []any{testAdvertisedSubnet}, + }) + + if strings.Contains(out, "kind: BondConfig") { + t.Errorf("empty-slaves bond must NOT emit BondConfig (Talos rejects it on apply with 'at least one link must be specified'); got:\n%s", out) + } + + if strings.Contains(out, "name: bond0") { + t.Errorf("empty-slaves bond's master link should not surface as a config document; got:\n%s", out) + } + + // Positive-shape pin: eth0 (non-enslaved, has an address) must + // still render as a LinkConfig. + assertContains(t, out, "kind: LinkConfig") + assertContains(t, out, "name: eth0") + assertContains(t, out, "192.168.1.100/24") +} + +// TestContract_NetworkMultidoc_BondWithoutSlaves_SkipsBondConfig_Generic +// is the generic-preset counterpart to the cozystack pin. The +// preset's bond emission shape is identical to cozystack's +// (charts/generic/templates/_helpers.tpl) and exhibits the same +// empty-slaves leak — the guardrail is applied in both presets +// and the contract is pinned in both. +func TestContract_NetworkMultidoc_BondWithoutSlaves_SkipsBondConfig_Generic(t *testing.T) { + out := renderGenericWith(t, emptyBondTopologyLookup(), map[string]any{ + "advertisedSubnets": []any{testAdvertisedSubnet}, + }) + + if strings.Contains(out, "kind: BondConfig") { + t.Errorf("generic preset: empty-slaves bond must NOT emit BondConfig; got:\n%s", out) + } + + if strings.Contains(out, "name: bond0") { + t.Errorf("generic preset: bond stub should not surface as a config document; got:\n%s", out) + } +} + // Contract: bond slaves never appear as standalone LinkConfig // documents. configurable_link_names filters them out via spec.slaveKind. func TestContract_NetworkMultidoc_BondSlavesNotEmittedAsLinkConfig(t *testing.T) { diff --git a/pkg/engine/render_test.go b/pkg/engine/render_test.go index b4499f8f..2a736282 100644 --- a/pkg/engine/render_test.go +++ b/pkg/engine/render_test.go @@ -823,6 +823,119 @@ func bondTopologyLookup() func(string, string, string) (map[string]any, error) { } } +// emptyBondTopologyLookup returns a LookupFunc emulating a bond +// stub created by Talos 1.13's link controller before any operator +// configuration enslaves physical NICs to it. The COSI link state +// has a `bond0` master link with `kind: bond` and a `bondMaster` +// spec, but no other link carries `slaveKind: bond` + `masterIndex` +// pointing at it — so `talm.discovered.bond_slaves` returns an +// empty list. Reused by the empty-slaves regression pin. +func emptyBondTopologyLookup() func(string, string, string) (map[string]any, error) { + bondLink := map[string]any{ + "metadata": map[string]any{"id": "bond0"}, + "spec": map[string]any{ + "kind": "bond", + "index": 10, + "bondMaster": map[string]any{ + "mode": "802.3ad", + "xmitHashPolicy": "layer3+4", + "lacpRate": "fast", + "miimon": 100, + }, + "hardwareAddr": "aa:bb:cc:dd:ee:ff", + "busPath": "pci-0000:00:1f.6", + }, + } + // One physical link that is NOT enslaved — operator has not + // configured the bond yet. Carries an address so the route + // resolution still has somewhere to point. + eth0 := map[string]any{ + "metadata": map[string]any{"id": "eth0"}, + "spec": map[string]any{ + "kind": "physical", + "hardwareAddr": "aa:bb:cc:dd:ee:00", + "busPath": "pci-0000:00:1f.0", + }, + } + routesList := map[string]any{ + "apiVersion": "v1", + "kind": "List", + "items": []any{ + map[string]any{ + "spec": map[string]any{ + "dst": "", + "gateway": "192.168.1.1", + "outLinkName": "eth0", + "family": "inet4", + "table": "main", + }, + }, + }, + } + linksList := map[string]any{ + "apiVersion": "v1", + "kind": "List", + "items": []any{bondLink, eth0}, + } + addressesList := map[string]any{ + "apiVersion": "v1", + "kind": "List", + "items": []any{ + map[string]any{ + "spec": map[string]any{ + "linkName": "eth0", + "address": "192.168.1.100/24", + "family": "inet4", + "scope": "global", + }, + }, + }, + } + nodeDefault := map[string]any{ + "spec": map[string]any{ + "addresses": []any{"192.168.1.100/24"}, + }, + } + resolvers := map[string]any{ + "spec": map[string]any{ + "dnsServers": []any{"8.8.8.8", "1.1.1.1"}, + }, + } + + return func(resource, _, id string) (map[string]any, error) { + switch resource { + case "routes": + return routesList, nil + case "links": + if id == "bond0" { + return bondLink, nil + } + + if id == "eth0" { + return eth0, nil + } + + if id == "" { + return linksList, nil + } + + return map[string]any{}, nil + case "addresses": + return addressesList, nil + case "nodeaddress": + if id == "default" { + return nodeDefault, nil + } + case "resolvers": + if id == "resolvers" { + return resolvers, nil + } + } + + return map[string]any{}, nil + } +} + // vlanOnBondTopologyLookup returns a LookupFunc emulating a VLAN interface // stacked on top of a bond. Used by VLANConfig rendering tests. func vlanOnBondTopologyLookup() func(string, string, string) (map[string]any, error) {