Skip to content

Commit e50f430

Browse files
feat(ci): add sdn for e2e nested cluster
Signed-off-by: Nikita Korolev <nikita.korolev@flant.com>
1 parent 0198448 commit e50f430

5 files changed

Lines changed: 215 additions & 21 deletions

File tree

.github/workflows/e2e-matrix.yml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@ name: E2E Matrix Tests (nested clusters)
1616

1717
on:
1818
workflow_dispatch:
19-
schedule:
20-
- cron: "40 4 * * *"
19+
pull_request:
20+
types: [opened, reopened, synchronize, labeled, unlabeled]
21+
branches:
22+
- main
23+
- feat/ci/e2e-nested-add-sdn
24+
# schedule:
25+
# - cron: "40 4 * * *"
2126

2227
concurrency:
2328
group: "${{ github.workflow }}-${{ github.event.number || github.ref }}"
@@ -29,6 +34,7 @@ defaults:
2934

3035
jobs:
3136
cleanup-nested-clusters:
37+
if: github.event_name != 'pull_request'
3238
name: Cleanup nested clusters
3339
runs-on: ubuntu-latest
3440
steps:
@@ -100,6 +106,7 @@ jobs:
100106
cleanup_kind "vmclass"
101107
102108
power-off-vms-for-nested:
109+
if: github.event_name != 'pull_request'
103110
name: Power off VMs for nested clusters
104111
needs: cleanup-nested-clusters
105112
runs-on: ubuntu-latest
@@ -315,7 +322,7 @@ jobs:
315322
fi
316323
set-vars:
317324
name: Set vars
318-
needs: power-off-vms-for-nested
325+
# needs: power-off-vms-for-nested
319326
runs-on: ubuntu-latest
320327
outputs:
321328
date_start: ${{ steps.vars.outputs.date-start }}
@@ -345,13 +352,15 @@ jobs:
345352
randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }}
346353
cluster_config_workers_memory: "9Gi"
347354
cluster_config_k8s_version: "1.34"
355+
e2e_focus_tests: "VirtualMachineAdditionalNetworkInterfaces"
348356
secrets:
349357
DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }}
350358
VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }}
351359
PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }}
352360
BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }}
353361

354362
e2e-nfs:
363+
if: github.event_name != 'pull_request'
355364
name: E2E Pipeline (NFS)
356365
needs:
357366
- set-vars
@@ -380,7 +389,7 @@ jobs:
380389
name: End-to-End tests report
381390
needs:
382391
- e2e-replicated
383-
- e2e-nfs
392+
# - e2e-nfs
384393
if: ${{ always()}}
385394
env:
386395
STORAGE_TYPES: '["replicated", "nfs"]'
@@ -647,4 +656,5 @@ jobs:
647656
curl --request POST --header 'Content-Type: application/json' --data "{\"text\": \"${COMBINED_SUMMARY}\"}" "$LOOP_WEBHOOK_URL"
648657
fi
649658
env:
650-
LOOP_WEBHOOK_URL: ${{ secrets.LOOP_WEBHOOK_URL }}
659+
LOOP_WEBHOOK_URL: ${{ secrets.LOOP_TEST_CHANNEL }}
660+
# LOOP_WEBHOOK_URL: ${{ secrets.LOOP_WEBHOOK_URL }}

.github/workflows/e2e-reusable-pipeline.yml

Lines changed: 173 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ jobs:
141141
run: |
142142
GIT_SHORT_HASH=$(git rev-parse --short HEAD)
143143
144-
namespace="nightly-e2e-$STORAGE_TYPE-$GIT_SHORT_HASH-$RANDUUID4C"
144+
namespace="test-sdn-e2e-$STORAGE_TYPE-$GIT_SHORT_HASH-$RANDUUID4C"
145145
146146
echo "namespace=$namespace" >> $GITHUB_OUTPUT
147147
echo "sha_short=$GIT_SHORT_HASH" >> $GITHUB_OUTPUT
@@ -253,12 +253,15 @@ jobs:
253253
- name: Bootstrap cluster [dhctl-bootstrap]
254254
id: dhctl-bootstrap
255255
working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}
256-
env:
257-
# Proxy settings will be added to values.yaml if proxyEnabled is true via task render-cluster-config-proxy
258-
HTTP_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }}
259-
HTTPS_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }}
260256
run: |
257+
if [[ $(yq eval '.deckhouse.proxyEnabled' values.yaml) == true ]]; then
258+
export HTTP_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}"
259+
export HTTPS_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}"
260+
echo "Proxy settings - configured"
261+
fi
262+
261263
task dhctl-bootstrap
264+
echo "[SUCCESS] Done"
262265
timeout-minutes: 30
263266
- name: Bootstrap cluster [show-connection-info]
264267
working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}
@@ -432,10 +435,171 @@ jobs:
432435
include-hidden-files: true
433436
retention-days: 3
434437

438+
configure-sdn:
439+
name: Configure SDN
440+
runs-on: ubuntu-latest
441+
needs: bootstrap
442+
steps:
443+
- uses: actions/checkout@v4
444+
445+
- name: Install Task
446+
uses: arduino/setup-task@v2
447+
with:
448+
version: 3.x
449+
repo-token: ${{ secrets.GITHUB_TOKEN }}
450+
451+
- name: Setup d8
452+
uses: ./.github/actions/install-d8
453+
env:
454+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
455+
456+
- name: Install kubectl CLI
457+
uses: azure/setup-kubectl@v4
458+
459+
- name: Check nested kube-api via generated kubeconfig
460+
run: |
461+
mkdir -p ~/.kube
462+
echo "[INFO] Configure kubeconfig for nested cluster"
463+
echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config
464+
465+
echo "[INFO] Show paths and files content"
466+
ls -la ~/.kube
467+
echo "[INFO] Set permissions for kubeconfig"
468+
chmod 600 ~/.kube/config
469+
470+
echo "[INFO] Show current kubeconfig context"
471+
kubectl config get-contexts
472+
473+
echo "[INFO] Show nodes in cluster"
474+
# `kubectl get nodes` may return error, so we need to retry.
475+
count=30
476+
success=false
477+
for i in $(seq 1 $count); do
478+
echo "[INFO] Attempt $i/$count..."
479+
if kubectl get nodes; then
480+
echo "[SUCCESS] Successfully retrieved nodes."
481+
success=true
482+
break
483+
fi
484+
485+
if [ $i -lt $count ]; then
486+
echo "[INFO] Retrying in 10 seconds..."
487+
sleep 10
488+
fi
489+
done
490+
491+
if [ "$success" = false ]; then
492+
echo "[ERROR] Failed to retrieve nodes after $count attempts."
493+
exit 1
494+
fi
495+
- name: Enable SDN
496+
run: |
497+
echo "[INFO] Enable SDN"
498+
d8 system module enable sdn
499+
echo "[INFO] Wait for sdn modules to be ready, timeout: 300s"
500+
kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sdn --timeout=300s
501+
echo "[INFO] Wait for sdn deployments to be ready, timeout: 300s"
502+
kubectl -n d8-sdn wait --for=condition=Available deploy --all --timeout 300s
503+
echo "[INFO] Wait for sdn daemonset agent to be ready, timeout: 300s"
504+
kubectl -n d8-sdn rollout status daemonset agent --timeout=300s
505+
echo "[SUCCESS] Done"
506+
507+
- name: Wait for nodenetworkinterfaces to be ready
508+
run: |
509+
count=60
510+
success=false
511+
wait_time_seconds=5
512+
513+
for i in $(seq 1 $count); do
514+
nodes=$(kubectl get nodes -o name | wc -l)
515+
actual=$(kubectl get nodenetworkinterfaces -o name 2>/dev/null | wc -l) || true
516+
expected=$((nodes * 2))
517+
518+
echo "[INFO] Attempt $i/$count: expected=$expected, actual=$actual"
519+
520+
if [ "$actual" -ge "$expected" ]; then
521+
echo "[SUCCESS] All nodenetworkinterfaces are present (expected=$expected, actual=$actual)"
522+
kubectl get nodenetworkinterfaces
523+
success=true
524+
break
525+
fi
526+
527+
if (( i % 5 == 0 )) ; then
528+
echo ::group::📝 [DEBUG] show namespaces d8-sdn
529+
kubectl -n d8-sdn get pods || true
530+
echo ::endgroup::
531+
532+
echo ::group::📝 [DEBUG] show nodenetworkinterfaces d8-sdn
533+
kubectl get nodenetworkinterfaces || true
534+
echo ::endgroup::
535+
536+
echo "[INFO] Retrying in 10 seconds..."
537+
sleep $wait_time_seconds
538+
elif [ $i -lt $count ]; then
539+
echo "[INFO] Retrying in 10 seconds..."
540+
sleep $wait_time_seconds
541+
fi
542+
done
543+
544+
if [ "$success" = false ]; then
545+
echo "[ERROR] Failed to get all nodenetworkinterfaces after $count attempts (expected=$expected)"
546+
echo "[DEBUG] Show namespaces d8-sdn"
547+
kubectl -n d8-sdn get pods || true
548+
echo "[DEBUG] Show nodenetworkinterfaces d8-sdn"
549+
kubectl get nodenetworkinterfaces || true
550+
exit 1
551+
fi
552+
553+
- name: Configure ClusterNetwork
554+
run: |
555+
extraNic=$(kubectl get nodenetworkinterfaces -l network.deckhouse.io/interface-type=NIC -o json | jq -r '.items[] | select(.status.operationalState == "Up") | select(.status.ifName != "eno1" and .status.ifName != "enp1s0") | .metadata.name')
556+
557+
for nic in $extraNic; do
558+
echo "[INFO] Label nodenetworkinterface $nic nic-group=extra"
559+
kubectl label nodenetworkinterfaces $nic nic-group=extra
560+
done
561+
562+
kubectl get nodenetworkinterface -l nic-group=extra
563+
564+
cat <<'EOF' | kubectl apply -f -
565+
---
566+
apiVersion: network.deckhouse.io/v1alpha1
567+
kind: ClusterNetwork
568+
metadata:
569+
name: cn-4006-for-e2e-test
570+
spec:
571+
parentNodeNetworkInterfaces:
572+
labelSelector:
573+
matchLabels:
574+
nic-group: extra
575+
type: Access
576+
---
577+
apiVersion: network.deckhouse.io/v1alpha1
578+
kind: ClusterNetwork
579+
metadata:
580+
name: cn-4007-for-e2e-test
581+
spec:
582+
parentNodeNetworkInterfaces:
583+
labelSelector:
584+
matchLabels:
585+
nic-group: extra
586+
type: VLAN
587+
vlan:
588+
id: 4007
589+
EOF
590+
591+
echo "[INFO] Wait for ClusterNetwork cn-4006-for-e2e-test to be ready"
592+
kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4006-for-e2e-test --timeout=120s
593+
594+
echo "[INFO] Wait for ClusterNetwork cn-4007-for-e2e-test to be ready"
595+
kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4007-for-e2e-test --timeout=120s
596+
435597
configure-storage:
436598
name: Configure storage
437599
runs-on: ubuntu-latest
438-
needs: bootstrap
600+
needs:
601+
- configure-sdn
602+
- bootstrap
439603
steps:
440604
- uses: actions/checkout@v4
441605

@@ -1144,6 +1308,7 @@ jobs:
11441308
11451309
echo "[INFO] Exit code: $GINKGO_EXIT_CODE"
11461310
exit $GINKGO_EXIT_CODE
1311+
# exit 1
11471312
- name: Upload summary test results (junit/xml)
11481313
uses: actions/upload-artifact@v4
11491314
id: e2e-report-artifact
@@ -1338,10 +1503,11 @@ jobs:
13381503
runs-on: ubuntu-latest
13391504
needs:
13401505
- bootstrap
1506+
- configure-sdn
13411507
- configure-storage
13421508
- configure-virtualization
13431509
- e2e-test
1344-
if: cancelled() || success()
1510+
if: (cancelled() || success()) && (needs.configure-sdn.result == 'success')
13451511
steps:
13461512
- uses: actions/checkout@v4
13471513

test/dvp-static-cluster/Taskfile.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ tasks:
116116
sh: date +%s
117117
cmds:
118118
- kubectl apply -f {{ .TMP_DIR }}/infra.yaml
119-
- kubectl -n {{ .NAMESPACE }} get all
119+
- kubectl -n {{ .NAMESPACE }} get vm,vd,vi,svc,pod,deploy
120120
- kubectl -n {{ .NAMESPACE }} wait --for=condition=Ready pod -l app=jump-host --timeout=300s
121121
- kubectl -n {{ .NAMESPACE }} get vi -o name | xargs kubectl -n {{ .NAMESPACE }} wait --for='jsonpath={.status.phase}=Ready' --timeout=600s
122122
- kubectl -n {{ .NAMESPACE }} get vd -o name | xargs kubectl -n {{ .NAMESPACE }} wait --for='jsonpath={.status.phase}=Ready' --timeout=600s
@@ -140,7 +140,7 @@ tasks:
140140
desc: Add proxy if enabled
141141
cmds:
142142
- |
143-
if yq eval '.deckhouse.proxyEnabled' values.yaml; then
143+
if [[ $(yq eval '.deckhouse.proxyEnabled' values.yaml) == true ]]; then
144144
yq eval --inplace '.proxy.httpProxy = env(HTTP_PROXY) | .proxy.httpsProxy = env(HTTPS_PROXY)' values.yaml
145145
fi
146146

test/dvp-static-cluster/charts/cluster-config/templates/master-nodes.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
{{- $totalNodes = add $totalNodes .count -}}
55
{{- end -}}
66

7+
{{- $masterCount := $.Values.instances.masterNodes.count | int -}}
8+
{{- if gt $masterCount 1 -}}
9+
{{- $staticCount := sub $masterCount 1 -}}
710
---
811
apiVersion: deckhouse.io/v1
912
kind: NodeGroup
@@ -24,21 +27,17 @@ spec:
2427
node-role.kubernetes.io/master: ""
2528
nodeType: Static
2629
staticInstances:
27-
count: {{ .Values.instances.masterNodes.count }}
30+
count: {{ $staticCount }}
2831
labelSelector:
2932
matchLabels:
3033
role: master
3134

32-
{{- range $_, $i := untilStep 0 (.Values.instances.masterNodes.count | int) 1}}
35+
{{- range $_, $i := untilStep 1 $masterCount 1}}
3336
{{- $vmName := printf "%s-master-%d" $.Values.storageType $i }}
3437
---
3538
apiVersion: deckhouse.io/v1alpha1
3639
kind: StaticInstance
3740
metadata:
38-
{{- if eq $i 0 }}
39-
annotations:
40-
static.node.deckhouse.io/skip-bootstrap-phase: ""
41-
{{- end }}
4241
name: {{ $vmName }}
4342
labels:
4443
role: master
@@ -48,3 +47,4 @@ spec:
4847
kind: SSHCredentials
4948
name: mvp-static
5049
{{- end }}
50+
{{- end }}

test/dvp-static-cluster/charts/infra/templates/_helpers.tpl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ spec:
3333
- kind: VirtualDisk
3434
name: {{ printf "%s-%d" $name $i }}
3535
{{- end }}
36-
{{- end }}
36+
networks:
37+
- type: Main
38+
- type: ClusterNetwork
39+
name: cn-4006-for-e2e-test
3740
bootloader: {{ $ctx.Values.image.bootloader }}
3841
liveMigrationPolicy: PreferForced
3942
cpu:
@@ -51,6 +54,18 @@ spec:
5154
#cloud-config
5255
ssh_pwauth: true
5356
package_update: true
57+
write_files:
58+
- path: /etc/netplan/99-eno2.yaml
59+
content: |
60+
network:
61+
version: 2
62+
ethernets:
63+
eno2:
64+
dhcp4: false
65+
dhcp6: false
66+
addresses: []
67+
link-local: []
68+
optional: true
5469
packages:
5570
- qemu-guest-agent
5671
- jq
@@ -68,6 +83,8 @@ spec:
6883
- {{ $ctx.Values.discovered.publicSSHKey }}
6984

7085
runcmd:
86+
- netplan apply
87+
- ip link set eno2 up
7188
- systemctl enable --now qemu-guest-agent.service
7289
final_message: "\U0001F525\U0001F525\U0001F525 The system is finally up, after $UPTIME seconds \U0001F525\U0001F525\U0001F525"
7390
runPolicy: AlwaysOn
@@ -108,4 +125,5 @@ spec:
108125
{{- end }}
109126
{{- end }}
110127
{{- end }}
128+
{{- end }}
111129
{{- end }}

0 commit comments

Comments
 (0)