diff --git a/.codex b/.codex new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..ac8563c676 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,150 @@ +name: Ansible Deployment + +on: + push: + branches: [lab06] + paths: + - "ansible/**" + - ".github/workflows/ansible-deploy.yml" + + pull_request: + branches: [lab06] + paths: + - "ansible/**" + - ".github/workflows/ansible-deploy.yml" + +jobs: + + lint: + name: Ansible Lint + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ansible ansible-lint + ansible-galaxy collection install community.docker + + - name: Write vault password from GitHub Secret (required to decrypt group_vars/all.yml) + working-directory: ansible + shell: bash + run: | + if [ -z "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then + echo "ERROR: GitHub Secret ANSIBLE_VAULT_PASSWORD is not set, but group_vars/all.yml is encrypted with Ansible Vault." + exit 1 + fi + printf '%s' "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > .vault_pass + chmod 600 .vault_pass + + - name: Run ansible-lint + working-directory: ansible + run: | + ansible-lint playbooks/*.yml + + + deploy: + name: Deploy Application + needs: lint + runs-on: ubuntu-latest + if: > + github.event_name == 'push' || + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) + + env: + ANSIBLE_HOST_KEY_CHECKING: "False" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ansible + ansible-galaxy collection install community.docker + + - name: Validate required deployment secrets + env: + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + VM_HOST: ${{ secrets.VM_HOST }} + VM_USER: ${{ secrets.VM_USER }} + ANSIBLE_VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + missing=() + for var in SSH_PRIVATE_KEY VM_HOST VM_USER ANSIBLE_VAULT_PASSWORD; do + if [ -z "${!var}" ]; then + missing+=("$var") + fi + done + if [ ${#missing[@]} -ne 0 ]; then + echo "Missing required secrets: ${missing[*]}" + exit 1 + fi + + - name: Write vault password from GitHub Secret + working-directory: ansible + shell: bash + run: | + if [ -z "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then + echo "ERROR: GitHub Secret ANSIBLE_VAULT_PASSWORD is not set." + exit 1 + fi + printf '%s\n' "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > .vault_pass + chmod 600 .vault_pass + + - name: Configure SSH + env: + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_DIR: $HOME/.ssh + run: | + install -m 700 -d "$SSH_DIR" + SSH_KEY_PATH="$SSH_DIR/ansible_deploy" + printf '%s\n' "$SSH_PRIVATE_KEY" > "$SSH_KEY_PATH" + chmod 600 "$SSH_KEY_PATH" + echo "SSH_KEY_PATH=$SSH_KEY_PATH" >> "$GITHUB_ENV" + + - name: Generate CI inventory + working-directory: ansible/inventory + run: | + cat < ci_hosts.ini + [webservers] + ci ansible_host=${{ secrets.VM_HOST }} ansible_user=${{ secrets.VM_USER }} ansible_ssh_private_key_file=$SSH_KEY_PATH + EOF + + + - name: SSH diagnostics + env: + VM_HOST: ${{ secrets.VM_HOST }} + VM_USER: ${{ secrets.VM_USER }} + SSH_KEY_PATH: ${{ env.SSH_KEY_PATH }} + run: | + set -x + ls -l "$SSH_KEY_PATH" + ssh -i "$SSH_KEY_PATH" -o BatchMode=yes -o StrictHostKeyChecking=no -v "$VM_USER@$VM_HOST" "echo 'SSH ok'" || true + + - name: Deploy with Ansible + working-directory: ansible + run: | + ansible-playbook playbooks/deploy.yml \ + -i inventory/ci_hosts.ini \ + --tags app_deploy + + - name: Verify deployment + run: | + sleep 10 + curl -f http://${{ secrets.VM_HOST }}:5000 + curl -f http://${{ secrets.VM_HOST }}:5000/health diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..185a93089b --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,91 @@ +name: python-ci + +on: + push: + branches: [lab08, master] + paths: + - "app_python/**" + - ".github/workflows/python-ci.yml" + tags: + - "v*" + pull_request: + paths: + - "app_python/**" + - ".github/workflows/python-ci.yml" + workflow_dispatch: + +concurrency: + group: python-ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +env: + IMAGE_NAME: egrapa/devops-core-course-lab2 + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: app_python + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pip" + cache-dependency-path: | + app_python/requirements.txt + app_python/requirements-dev.txt + + - name: Install deps + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + + - name: Lint + run: flake8 . + + - name: Tests + run: pytest -q + + docker-release: + runs-on: ubuntu-latest + needs: [ test ] + if: startsWith(github.ref, 'refs/tags/v') + + steps: + - uses: actions/checkout@v4 + + - uses: docker/setup-buildx-action@v3 + + - uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract version + id: version + run: | + VERSION="${GITHUB_REF#refs/tags/v}" + MAJOR_MINOR="$(echo "$VERSION" | awk -F. '{print $1"."$2}')" + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "major_minor=$MAJOR_MINOR" >> $GITHUB_OUTPUT + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./app_python + file: ./app_python/Dockerfile + push: true + tags: | + ${{ env.IMAGE_NAME }}:${{ steps.version.outputs.version }} + ${{ env.IMAGE_NAME }}:${{ steps.version.outputs.major_minor }} + ${{ env.IMAGE_NAME }}:latest + cache-from: type=gha + cache-to: type=gha,mode=max \ No newline at end of file diff --git a/.gitignore b/.gitignore index 30d74d2584..07c7b214d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ -test \ No newline at end of file +test +*.retry +.vault_pass +.env +.venv +monitoring/data/* +!monitoring/data/.gitkeep diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..a885ca5e92 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,11 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +retry_files_enabled = False +vault_password_file = .vault_pass + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..dc1e6ef97f --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,364 @@ +# LAB 5 — Ansible Fundamentals Report + +--- + +## 1. Architecture Overview + +### Ansible Version + +``` +ansible --version +ansible [core 2.20.2] + config file = /home/egrapa/prog/DevOps-Core-Course/ansible/ansible.cfg + ansible python module location = /usr/lib/python3.14/site-packages/ansible + executable location = /usr/bin/ansible + python version = 3.14.3 +``` + +### Control Node + +* OS: Arch Linux +* Ansible installed locally +* SSH key authentication used + +### Target VM + +* Virtualization: KVM/QEMU (virt-manager) +* Network: libvirt NAT (192.168.122.x) +* OS: Ubuntu 25.10 (Questing Quokka) +* Python: `/usr/bin/python3.13` +* Access: SSH (key-based authentication) + +![alt text](image.png) +--- + +## Infrastructure Overview + +``` +Host Machine (Arch Linux) + │ + ├── Ansible (Control Node) + │ └── SSH + │ + └── KVM / QEMU VM (Ubuntu 25.10) + └── Docker Engine + └── Python Application Container +``` + +--- + +## Project Structure + +``` +ansible/ +├── inventory/ +│ └── hosts.ini +├── inventory/group_vars/ +│ └── all.yml (encrypted via Vault) +├── roles/ +│ ├── common/ +│ ├── docker/ +│ └── web_app/ +├── playbooks/ +│ ├── provision.yml +│ ├── deploy.yml +│ └── site.yml +├── ansible.cfg +└── docs/ + └── LAB05.md +``` + +--- + +## Why Roles Instead of Monolithic Playbooks? + +Roles allow: + +* Separation of concerns +* Clear modular structure +* Reusability across projects +* Cleaner main playbooks +* Easier debugging and maintenance + +Instead of putting all tasks in one large file, each responsibility is isolated. + +--- + +# 2. Roles Documentation + +--- + +## 2.1 Role: `common` + +### Purpose + +Prepare base Ubuntu system with required tools for further provisioning and automation. + +### Tasks + +* Update apt cache +* Install essential packages + +### Variables (`defaults/main.yml`) + +```yaml +common_packages: + - python3-pip + - curl + - git + - vim + - htop +``` + +### Idempotency + +* Uses `apt` with `state: present` +* Uses `cache_valid_time` to prevent unnecessary updates +* No imperative shell commands + +### Handlers + +None required. + +--- + +## 2.2 Role: `docker` + +### Purpose + +Install and configure Docker Engine. + +### Tasks + +* Install required dependencies +* Add Docker GPG key +* Add Docker repository +* Install Docker packages +* Enable and start Docker service +* Add SSH user to docker group +* Install `python3-docker` + +### Variables (`defaults/main.yml`) + +```yaml +docker_user: yan +``` + +### Handler + +```yaml +- name: restart docker + service: + name: docker + state: restarted +``` + +Triggered when Docker packages change. + +### Idempotency + +* `apt_repository` ensures repo is added once +* `apt state=present` +* `service state=started` +* `user append=yes` + +Second run produces zero changes. + +--- + +## 2.3 Role: `web_app` + +### Purpose + +Deploy containerized Python application from Docker Hub. + +### Tasks + +* Login to Docker Hub (via Vault credentials) +* Pull latest image +* Run container +* Wait for port 5000 +* Perform HTTP health check + +### Variables (Vault — encrypted) + +Stored in: + +``` +inventory/group_vars/all.yml +``` + +Decrypted content example: + +```yaml +dockerhub_username: egrapa +dockerhub_password: + +app_name: devops-core-course-lab2 +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: latest +app_port: 5000 +app_container_name: "{{ app_name }}" +``` + +### Security + +* Credentials encrypted with Ansible Vault +* `no_log: true` on login task +* `.vault_pass` excluded from Git +* Only encrypted vault file committed + +### Idempotency + +* `docker_image source: pull` +* `docker_container state: started` +* Container recreated only if configuration changes + +--- + +# 3. Idempotency Demonstration + +## First Run + +``` +ansible-playbook playbooks/provision.yml +``` + +Observed: + +* Docker repository added +* Packages installed +* Service started + +Tasks show `changed`. + +![alt text](image-1.png) + +--- + +## Second Run + +``` +ansible-playbook playbooks/provision.yml +``` + +Observed: + +* All tasks show `ok` +* No `changed` + +This confirms idempotency. +![alt text](image-2.png) +--- + +## Why It Is Idempotent + +* Declarative state (`state: present`, `state: started`) +* No shell commands used for state changes +* Ansible checks system state before applying changes + +--- + +# 4. Ansible Vault Usage + +Vault file: + +``` +inventory/group_vars/all.yml +``` + +Encrypted format: + +``` +$ANSIBLE_VAULT;1.1;AES256 +6238656365... +``` + +### Vault Strategy + +* Vault password provided via `--ask-vault-pass` +* `.vault_pass` not committed +* Encrypted file safely committed +* No plaintext secrets in repository + +### Why Vault Is Important + +* Prevents credential leakage +* Secure automation +* Enables safe version control + +--- + +# 5. Deployment Verification + +## Deployment + +``` +ansible-playbook playbooks/deploy.yml --ask-vault-pass +``` + +Application deployed successfully. +![alt text](image-3.png) + +--- + +## Container Status + +``` +ansible webservers -a "docker ps" +``` + +Output shows: + +``` +egrapa/devops-app:latest Up 0.0.0.0:5000->5000/tcp +``` +![alt text](image-4.png) + +--- + +## Health Check + +``` +curl http://192.168.122.221:5000/health +``` + +Response: + +![alt text](image-5.png) + +Application is accessible and running. + +--- + +# 6. Key Decisions + +### Why Use Roles? + +To separate infrastructure concerns and maintain clean modular automation. + +### How Do Roles Improve Reusability? + +Each role can be reused independently across different projects. + +### What Makes a Task Idempotent? + +It declares a desired state and does nothing if the system already matches that state. + +### How Do Handlers Improve Efficiency? + +They run only when notified, preventing unnecessary service restarts. + +### Why Is Ansible Vault Necessary? + +To protect Docker Hub credentials from exposure in Git repositories. + +--- + +# 7. Challenges Encountered + +* Vault variables not being loaded due to incorrect group_vars location +* Ensuring correct ansible.cfg was used +* Docker Hub authentication token configuration +* Understanding Ansible variable precedence +* Ensuring idempotency of repository and Docker installation tasks diff --git a/ansible/docs/LAB05.pdf b/ansible/docs/LAB05.pdf new file mode 100644 index 0000000000..c4c646cb59 Binary files /dev/null and b/ansible/docs/LAB05.pdf differ diff --git a/ansible/docs/LAB06.md b/ansible/docs/LAB06.md new file mode 100644 index 0000000000..883b28af3a --- /dev/null +++ b/ansible/docs/LAB06.md @@ -0,0 +1,338 @@ +# Lab 6: Advanced Ansible & CI/CD + +**Name:** egrapa\ +**Date:** 2026-03-05\ +**Lab Points:** 10 + 0 bonus + +------------------------------------------------------------------------ + +# Overview + +In this lab I extended the Ansible automation from the previous +assignment and implemented several production-style improvements: + +- refactoring roles using blocks, rescue and always +- adding tags for selective execution +- migrating container deployment from docker run to Docker Compose +- implementing safe wipe logic for clean reinstallation +- integrating CI/CD with GitHub Actions + +The infrastructure is deployed to a cloud VM and managed through Ansible +playbooks. + +------------------------------------------------------------------------ + +# Task 1 --- Blocks & Tags + +## Implementation + +The provisioning roles were refactored to use Ansible blocks for +grouping related tasks and applying shared directives. + +### common role + +`roles/common/tasks/main.yml` now contains two main blocks. + +**packages block** + +- installs required packages +- runs with `become: true` +- tagged with `packages` +- includes rescue logic for apt cache failures + +If updating the apt cache fails, a rescue step runs: + + apt-get update --fix-missing + +The block also includes an `always` section that writes a log file: + + /tmp/common_packages.log + +------------------------------------------------------------------------ + +### users block + +User management tasks were grouped into a separate block: + +- tagged `users` +- loops through `common_users` +- allows configurable shell and groups + +------------------------------------------------------------------------ + +### docker role + +Docker installation was split into two blocks. + +**docker_install** + +- install prerequisites +- add Docker repository +- install Docker packages + +**docker_config** + +- configure Docker group +- add user to docker group + +The install block includes a rescue section that retries the apt update +if the Docker GPG key download fails. + +------------------------------------------------------------------------ + +# Evidence --- Tag System + +### List available tags + +Command executed: + +``` bash +ansible-playbook playbooks/provision.yml --list-tags +``` + +**Output** + +![alt text](image-7.png) + +------------------------------------------------------------------------ + +### Selective execution of docker tasks + +Command executed: + +``` bash +ansible-playbook playbooks/provision.yml --tags docker --list-tasks +``` + +**Output** + +![alt text](image-8.png) + +------------------------------------------------------------------------ + +# Task 2 --- Docker Compose Migration + +## Role Refactoring + +The role `app_deploy` was renamed to **web_app**. + +Reasons: + +- clearer role purpose +- easier extension for future services +- consistent naming for wipe logic + +------------------------------------------------------------------------ + +## Docker Compose Template + +A template was created: + + roles/web_app/templates/docker-compose.yml.j2 + +Example structure: + + services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_tag }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" + restart: unless-stopped + +Variables are defined in `group_vars/all.yml`. + +------------------------------------------------------------------------ + +# Deployment Evidence + +### Syntax validation + +Command executed: + +``` bash +ansible-playbook playbooks/deploy.yml --syntax-check +``` + +Output: + +![alt text](image-9.png) + +------------------------------------------------------------------------ + +### Deployment run + +Command executed: + +``` bash +ansible-playbook playbooks/deploy.yml +``` + +Output: +![alt text](image-13.png) +Failed because of +![alt text](image-12.png) +So I changed and ran again +![alt text](image-11.png) + +(Had to fix a docker compose file, since I used legacy legacy option) + +------------------------------------------------------------------------ + +### Idempotency verification + +Look at prev run - only the fixed part changed + +------------------------------------------------------------------------ + +### Container verification + +Command executed on the VM: + +``` bash +docker ps +``` + +Output: +![alt text](image-14.png) + +------------------------------------------------------------------------ + +### Application accessibility + +Command executed: + +``` bash +curl http://VM_IP:5000 +``` + +Output: +![alt text](image-15.png) + +------------------------------------------------------------------------ + +# Task 3 --- Wipe Logic + +## Implementation + +The wipe logic allows removing the deployed application safely. + +The mechanism uses two conditions: + +1. variable ```web_app_wipe=true``` + +2. tag ```web_app_wipe``` + +Both must be provided for wipe-only runs. + +------------------------------------------------------------------------ + +# Wipe Logic Testing + +## Scenario 1 --- Normal deployment + +Command: + + ansible-playbook playbooks/deploy.yml + +Result: + +![alt text](image-16.png) + +------------------------------------------------------------------------ + +## Scenario 2 --- Wipe only + +Command: + + ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe + +Output: + +![alt text](image-17.png) + +------------------------------------------------------------------------ + +## Scenario 3 --- Clean reinstall + +Command: + + ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" + +Output: +![alt text](image-18.png) + +------------------------------------------------------------------------ + +# Task 4 --- CI/CD with GitHub Actions + +## Workflow Overview + +Workflow file: + + .github/workflows/ansible-deploy.yml + +Pipeline steps: + +1. checkout repository +2. install Ansible +3. run ansible-lint +4. run deployment playbook +5. verify application with curl + +------------------------------------------------------------------------ + +# CI/CD Evidence + +### ansible-lint execution + + +![alt text](image-20.png) + +------------------------------------------------------------------------ + +### Workflow run screenshot + +![alt text](image-19.png) + +------------------------------------------------------------------------ + +### Deployment +![alt text](image-21.png) +Unfortunetly my server is not reachable from github runner, so I guess I will have fail for Deployment CI state, sorry:( + +![alt text](image-22.png) + +------------------------------------------------------------------------ + +# Challenges + +Temporary directory permission issue with Ansible: + + ANSIBLE_LOCAL_TEMP=/tmp + +Role rename required updating playbooks and variable references. + +Docker compose module required installing `community.docker` collection. + +Server IP block from github runner + +------------------------------------------------------------------------ + +# Summary + +In this lab I improved the infrastructure automation by introducing: + +- block-based task organization +- tag-based selective execution +- Docker Compose deployment +- wipe logic for clean reinstall +- CI/CD automation with GitHub Actions + +These changes improve maintainability, reliability and reproducibility +of the deployment process. + +------------------------------------------------------------------------ + +**Time spent:** \~5 hours\ +**Main learning outcome:** deeper understanding of Ansible execution +control, Docker Compose automation and CI/CD integration. \ No newline at end of file diff --git a/ansible/docs/image-1.png b/ansible/docs/image-1.png new file mode 100644 index 0000000000..67bcb49ca3 Binary files /dev/null and b/ansible/docs/image-1.png differ diff --git a/ansible/docs/image-10.png b/ansible/docs/image-10.png new file mode 100644 index 0000000000..b1a3de931d Binary files /dev/null and b/ansible/docs/image-10.png differ diff --git a/ansible/docs/image-11.png b/ansible/docs/image-11.png new file mode 100644 index 0000000000..90b112a784 Binary files /dev/null and b/ansible/docs/image-11.png differ diff --git a/ansible/docs/image-12.png b/ansible/docs/image-12.png new file mode 100644 index 0000000000..409e3651a0 Binary files /dev/null and b/ansible/docs/image-12.png differ diff --git a/ansible/docs/image-13.png b/ansible/docs/image-13.png new file mode 100644 index 0000000000..1c1fbe52b2 Binary files /dev/null and b/ansible/docs/image-13.png differ diff --git a/ansible/docs/image-14.png b/ansible/docs/image-14.png new file mode 100644 index 0000000000..061bc3deff Binary files /dev/null and b/ansible/docs/image-14.png differ diff --git a/ansible/docs/image-15.png b/ansible/docs/image-15.png new file mode 100644 index 0000000000..9f925a59bb Binary files /dev/null and b/ansible/docs/image-15.png differ diff --git a/ansible/docs/image-16.png b/ansible/docs/image-16.png new file mode 100644 index 0000000000..85da81c973 Binary files /dev/null and b/ansible/docs/image-16.png differ diff --git a/ansible/docs/image-17.png b/ansible/docs/image-17.png new file mode 100644 index 0000000000..d757a33e68 Binary files /dev/null and b/ansible/docs/image-17.png differ diff --git a/ansible/docs/image-18.png b/ansible/docs/image-18.png new file mode 100644 index 0000000000..92a66734a6 Binary files /dev/null and b/ansible/docs/image-18.png differ diff --git a/ansible/docs/image-19.png b/ansible/docs/image-19.png new file mode 100644 index 0000000000..cf8c00650f Binary files /dev/null and b/ansible/docs/image-19.png differ diff --git a/ansible/docs/image-2.png b/ansible/docs/image-2.png new file mode 100644 index 0000000000..8e8406e57f Binary files /dev/null and b/ansible/docs/image-2.png differ diff --git a/ansible/docs/image-20.png b/ansible/docs/image-20.png new file mode 100644 index 0000000000..b24987d839 Binary files /dev/null and b/ansible/docs/image-20.png differ diff --git a/ansible/docs/image-21.png b/ansible/docs/image-21.png new file mode 100644 index 0000000000..4a556880bc Binary files /dev/null and b/ansible/docs/image-21.png differ diff --git a/ansible/docs/image-22.png b/ansible/docs/image-22.png new file mode 100644 index 0000000000..67408fb7a2 Binary files /dev/null and b/ansible/docs/image-22.png differ diff --git a/ansible/docs/image-3.png b/ansible/docs/image-3.png new file mode 100644 index 0000000000..76ecb82199 Binary files /dev/null and b/ansible/docs/image-3.png differ diff --git a/ansible/docs/image-4.png b/ansible/docs/image-4.png new file mode 100644 index 0000000000..1726fe56d6 Binary files /dev/null and b/ansible/docs/image-4.png differ diff --git a/ansible/docs/image-5.png b/ansible/docs/image-5.png new file mode 100644 index 0000000000..49ed8b60b7 Binary files /dev/null and b/ansible/docs/image-5.png differ diff --git a/ansible/docs/image-6.png b/ansible/docs/image-6.png new file mode 100644 index 0000000000..b6771ff640 Binary files /dev/null and b/ansible/docs/image-6.png differ diff --git a/ansible/docs/image-7.png b/ansible/docs/image-7.png new file mode 100644 index 0000000000..3ff1baf47f Binary files /dev/null and b/ansible/docs/image-7.png differ diff --git a/ansible/docs/image-8.png b/ansible/docs/image-8.png new file mode 100644 index 0000000000..bae592666d Binary files /dev/null and b/ansible/docs/image-8.png differ diff --git a/ansible/docs/image-9.png b/ansible/docs/image-9.png new file mode 100644 index 0000000000..b0692f253e Binary files /dev/null and b/ansible/docs/image-9.png differ diff --git a/ansible/docs/image.png b/ansible/docs/image.png new file mode 100644 index 0000000000..dbb47716f3 Binary files /dev/null and b/ansible/docs/image.png differ diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml new file mode 100644 index 0000000000..f9476fff10 --- /dev/null +++ b/ansible/group_vars/all.yml @@ -0,0 +1,19 @@ +$ANSIBLE_VAULT;1.1;AES256 +38656230393534636532653865313961386539386132336438646130323465366139336165636166 +3638626462326139306362396338636631663437373264610a633836666534666633613263336263 +31386137383462306430366331306236386236626431343065373765373736626664323064383834 +6333636564646361360a663435663366323832376636306565613531643539636435336136386633 +61623530396338646566656262643637386539636537616466333131396235383031656539313063 +61396537396266613064313531366135336539653233646237346636623130623532303635653531 +62333066373436333330376530353364373638356136343362343232336533653336373361366265 +38303831653632636637386330373466353063376533323237303163633735363065613536663066 +31363635653962663339326635373737316139356365396664303963313137353936323131366263 +37613364376563306631363034306165353636646661376338663262383537386162643332613433 +38626562646634363936333536346232376337666161376433393361643235636430646636393665 +32356135663161306133313966363138336637636138666637396233326464316636323564613438 +61353633316432373562373039313962343165383935366463623232383965343063333730346361 +37613634353538616264333163373532626433316665313461383439313335653633643331666534 +61343132633133396261396133393664316662373536326234393665316366303666333264653865 +31373136346437663039393637303862633964306563396330353261623034613335383133313034 +63663437353761653239366565313963666435343032343537663631633762323039653437316236 +3436343935316337666663366238366666306363363338623765 diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..c037ab064b --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,2 @@ +[webservers] +hyperion ansible_host=51.250.77.127 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/lab04 \ No newline at end of file diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..ba4875b1db --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy app + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + roles: + - web_app diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..e5beaefb3c --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision server + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/ansible/playbooks/site.yml b/ansible/playbooks/site.yml new file mode 100644 index 0000000000..63dcc1014e --- /dev/null +++ b/ansible/playbooks/site.yml @@ -0,0 +1,6 @@ +--- +- name: Provision infrastructure + import_playbook: provision.yml + +- name: Deploy application + import_playbook: deploy.yml diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..0d15c8e2e5 --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,9 @@ +common_packages: + - python3-pip + - curl + - git + - vim + - htop + +# Optional user accounts to manage with the common role +common_users: [] diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..14b2e3cbce --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,45 @@ +- name: Ensure base packages are available + tags: + - common + - packages + become: true + block: + - name: Update apt cache for base packages + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + rescue: + - name: Retry apt cache update with fix-missing + ansible.builtin.apt: + update_cache: true + cache_valid_time: 0 + force_apt_get: true + always: + - name: Log completion of package setup + ansible.builtin.copy: + dest: /tmp/common_packages.log + content: "Common role package block completed at {{ ansible_date_time.iso8601 }}\n" + mode: "0644" + +- name: Manage common user accounts + tags: + - common + - users + when: common_users | length > 0 + become: true + block: + - name: Ensure declared users exist + ansible.builtin.user: + name: "{{ item.name }}" + state: "{{ item.state | default('present') }}" + groups: "{{ item.groups | default(omit) }}" + shell: "{{ item.shell | default('/bin/bash') }}" + create_home: "{{ item.create_home | default(true) }}" + loop: "{{ common_users }}" + loop_control: + label: "{{ item.name }}" diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..372575fd86 --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1 @@ +docker_user: ubuntu diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..0162ba52da --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart Docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..bac158e8f8 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,63 @@ +- name: Install Docker Engine packages + tags: + - docker + - docker_install + become: true + block: + - name: Install Docker prerequisites + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + + - name: Add Docker GPG key + ansible.builtin.shell: | + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + args: + creates: /etc/apt/keyrings/docker.asc + + - name: Add Docker repository + ansible.builtin.apt_repository: + repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + + - name: Install Docker packages + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - python3-docker + state: present + update_cache: true + notify: Restart Docker + rescue: + - name: Wait before retrying apt cache + ansible.builtin.wait_for: + timeout: 10 + + - name: Retry apt cache after failure + ansible.builtin.apt: + update_cache: true + cache_valid_time: 0 + always: + - name: Ensure Docker service is enabled + ansible.builtin.service: + name: docker + state: started + enabled: true + +- name: Configure Docker post-installation settings + tags: + - docker + - docker_config + become: true + block: + - name: Add user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..5aafaefe92 --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,22 @@ +--- +# Application metadata +web_app_name: "{{ app_name | default('devops-app') }}" +web_app_container_name: "{{ app_container_name | default(web_app_name) }}" +web_app_docker_image: "{{ docker_image | default('your_dockerhub_username/devops-info-service') }}" +web_app_docker_tag: "{{ docker_tag | default(docker_image_tag | default('latest')) }}" +web_app_port: "{{ app_port | default(8000) }}" +web_app_internal_port: "{{ app_internal_port | default(8000) }}" +web_app_restart_policy: "{{ restart_policy | default('unless-stopped') }}" +web_app_healthcheck_endpoint: "{{ app_healthcheck_endpoint | default('/health') }}" + +# Compose configuration +web_app_compose_project_dir: "{{ compose_project_dir | default('/opt/' ~ web_app_name) }}" +web_app_docker_compose_pull: "{{ docker_compose_pull | default('always') }}" # valid values: always, missing, never, policy +web_app_environment: {} + +# Registry credentials (override with Vault as needed) +web_app_dockerhub_username: "{{ dockerhub_username | default('') }}" +web_app_dockerhub_password: "{{ dockerhub_password | default('') }}" + +# Wipe control +web_app_wipe: false # Set to true alongside --tags web_app_wipe to remove the app diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..3c6ae0df1c --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart app + community.docker.docker_container: + name: "{{ web_app_container_name }}" + state: started + restart: true diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..2a399e11e6 --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker # Docker engine must exist before compose deployment diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..cb0b647225 --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,56 @@ +- name: Include wipe tasks when requested + tags: + - web_app_wipe + ansible.builtin.include_tasks: wipe.yml + when: web_app_wipe | bool + +- name: Deploy web application with Docker Compose + tags: + - app_deploy + - compose + become: true + block: + - name: Login to Docker registry + community.docker.docker_login: + username: "{{ web_app_dockerhub_username }}" + password: "{{ web_app_dockerhub_password }}" + no_log: true + when: + - web_app_dockerhub_username is defined + - web_app_dockerhub_username | length > 0 + - web_app_dockerhub_password is defined + - web_app_dockerhub_password | length > 0 + + - name: Ensure compose project directory exists + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}" + state: directory + mode: "0755" + + - name: Template docker-compose definition + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ web_app_compose_project_dir }}/docker-compose.yml" + mode: "0644" + + - name: Deploy stack with docker compose + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_project_dir }}" + project_name: "{{ web_app_name }}" + pull: "{{ web_app_docker_compose_pull }}" + state: present + + - name: Wait for application port + ansible.builtin.wait_for: + port: "{{ web_app_port }}" + delay: 5 + timeout: 45 + + - name: Perform HTTP health check + ansible.builtin.uri: + url: "http://localhost:{{ web_app_port }}{{ web_app_healthcheck_endpoint }}" + status_code: 200 + rescue: + - name: Report deployment failure + ansible.builtin.debug: + msg: "Deployment failed for {{ web_app_name }}; inspect compose logs." diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..757ec5536f --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,34 @@ +--- +- name: Remove deployed web application + tags: + - web_app_wipe + when: web_app_wipe | bool + become: true + block: + - name: Check if compose project directory exists + ansible.builtin.stat: + path: "{{ web_app_compose_project_dir }}" + register: web_app_compose_dir + + - name: Stop and remove Compose stack + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_project_dir }}" + state: absent + when: web_app_compose_dir.stat.isdir | default(false) + failed_when: false + + - name: Remove docker-compose file + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}/docker-compose.yml" + state: absent + when: web_app_compose_dir.stat.exists | default(false) + + - name: Remove application directory + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}" + state: absent + when: web_app_compose_dir.stat.exists | default(false) + + - name: Confirm application wipe + ansible.builtin.debug: + msg: "Application {{ web_app_name }} removed from {{ web_app_compose_project_dir }}" diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..856d7658d0 --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,13 @@ +services: + {{ web_app_name }}: + image: "{{ web_app_docker_image }}:{{ web_app_docker_tag }}" + container_name: "{{ web_app_container_name }}" + restart: "{{ web_app_restart_policy }}" + ports: + - "{{ web_app_port }}:{{ web_app_internal_port }}" +{% if web_app_environment %} + environment: +{% for key, value in web_app_environment.items() %} + {{ key }}: "{{ value }}" +{% endfor %} +{% endif %} diff --git a/app_python/.dockerignore b/app_python/.dockerignore new file mode 100644 index 0000000000..6921ea343b --- /dev/null +++ b/app_python/.dockerignore @@ -0,0 +1,33 @@ +# VCS +.git +.gitignore + +# Python artifacts +__pycache__/ +*.py[cod] +*.pyo +*.pyd + +# Virtual environments +venv/ +.venv/ + +# Tests & local tooling (not needed at runtime) +tests/ +.pytest_cache/ +.coverage +htmlcov/ + +# Docs & screenshots (not needed at runtime) +docs/ +*.md + +# IDE/editor +.vscode/ +.idea/ + +# OS +.DS_Store + +# Logs +*.log diff --git a/app_python/.gitignore b/app_python/.gitignore new file mode 100644 index 0000000000..c47024995b --- /dev/null +++ b/app_python/.gitignore @@ -0,0 +1,20 @@ +# Python +__pycache__/ +*.py[cod] +venv/ +.env +*.log + +# Test / tooling +.pytest_cache/ +.coverage +htmlcov/ +data/* +!data/.gitkeep + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..6749c2e6bd --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,29 @@ +# syntax=docker/dockerfile:1 +FROM python:3.13-slim + +# Security / correctness defaults for Python in containers +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Create a non-root user (mandatory) +# Fixed UID/GID helps in CI and when mounting volumes. +RUN groupadd --gid 10001 appuser \ + && useradd --uid 10001 --gid 10001 --create-home --shell /usr/sbin/nologin appuser \ + && mkdir -p /app/data /config \ + && chown -R appuser:appuser /app /config + +# Install dependencies first to leverage Docker layer caching +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r /app/requirements.txt + +# Copy only what is required to run the service +COPY app.py /app/app.py + +EXPOSE 5000 + +USER appuser + +CMD ["python", "app.py"] diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..7cba16a38f --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,235 @@ +# DevOps Info Service (Python / Flask) + +[![python-ci](https://github.com/egraPA006/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg)](https://github.com/egraPA006/DevOps-Core-Course/actions/workflows/python-ci.yml) +![version](https://img.shields.io/github/v/tag/egraPA006/DevOps-Core-Course?sort=semver) + +## Overview + +DevOps Info Service is a small HTTP web service that exposes information +about: - the service itself (metadata) - the host system and runtime +environment - incoming HTTP request context + +It also provides a health-check endpoint intended for monitoring systems +and container orchestrators (Docker, Kubernetes). + +This repository is used across DevOps labs: + - **Lab 1:** endpoints + +documentation + - **Lab 2:** Docker containerization + - **Lab 3:** CI/CD +(lint + tests + Docker build/push on version tags) + +> Lab reports live in `app_python/docs/`. This README is the repository +> entry point. + +## Prerequisites + +- Python **3.11+** (CI uses **Python 3.13**) +- `pip` +- (recommended) virtual environment (`venv`) +- Docker (optional, for container run) + +## Project Structure (Python app) + + app_python/ + ├── app.py + ├── Dockerfile + ├── requirements.txt + ├── requirements-dev.txt + ├── pytest.ini + └── tests/ + ├── conftest.py + └── test_endpoints.py + +## Installation + +``` bash +cd app_python +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +pip install -r requirements-dev.txt +``` + +## Running the Application + +### Default run + +``` bash +python app.py +``` + +Service will be available at: + + http://0.0.0.0:5000 + +### Custom configuration + +``` bash +PORT=8080 python app.py +HOST=127.0.0.1 PORT=3000 python app.py +DEBUG=true python app.py +``` + +## API Endpoints + +### `GET /` + +Returns service metadata, system information, runtime details, and +request context. + +Example: + +``` bash +curl -s http://127.0.0.1:5000/ | python -m json.tool +``` + +### `GET /health` + +Health-check endpoint used for monitoring and readiness/liveness probes. + +Example: + +``` bash +curl -s http://127.0.0.1:5000/health | python -m json.tool +``` + +### `GET /visits` + +Returns the current persistent visit count. + +Example: + +``` bash +curl -s http://127.0.0.1:5000/visits | python -m json.tool +``` + +## Configuration + +The application is configured via environment variables: + + Variable Default Description + ---------- ----------- -------------------------------------------- + `HOST` `0.0.0.0` Server bind address + `PORT` `5000` TCP port + `DEBUG` `False` Enables Flask debug mode and debug logging + `APP_CONFIG_PATH` `/config/config.json` JSON config file path + `VISITS_FILE` `data/visits` Persistent visit counter file + `APP_ENV` `dev` Runtime environment label + `LOG_LEVEL` `INFO` Log level metadata exposed by the app + `APP_DISPLAY_NAME` `devops-info-service` Service name override + +## Persistent Visits Counter + +Every `GET /` request increments a file-backed counter. The application +loads the existing value on startup, stores updates in `VISITS_FILE`, +and exposes the current count through `GET /visits`. + +Quick local check: + +``` bash +cd app_python +python app.py +curl -s http://127.0.0.1:5000/ +curl -s http://127.0.0.1:5000/visits | python -m json.tool +cat data/visits +``` + +## Testing & Lint (Lab 3) + +Tools used: - **pytest** (unit tests) - **flake8** (lint) + +Run locally: + +``` bash +cd app_python +pytest -q +flake8 . +``` + +## CI/CD (Lab 3) + +### Workflow summary + +GitHub Actions workflow: `.github/workflows/python-ci.yml` + +Triggers: - `push` / `pull_request` only when files under +`app_python/**` change (or when the workflow file itself changes) - +manual trigger via `workflow_dispatch` + +Pipeline stages: 1. **test job** - Python 3.13 - installs dependencies - +runs `flake8` - runs `pytest` - caches pip dependencies 2. +**docker-release job** (runs only if tests passed) - triggers only on +git tags matching `v*` (e.g. `v1.2.3`) - builds and pushes Docker image +to Docker Hub - uses BuildKit cache (GHA cache) + +### Docker image & tags + +Docker Hub image: - `egrapa/devops-core-course-lab2` + +On tag `vX.Y.Z`, CI pushes: - `egrapa/devops-core-course-lab2:X.Y.Z` - +`egrapa/devops-core-course-lab2:X.Y` - +`egrapa/devops-core-course-lab2:latest` + +### How to publish a release (SemVer) + +Create and push a version tag: + +``` bash +git tag v1.2.3 +git push origin v1.2.3 +``` + +After that, `docker-release` will build and push the image tags listed +above. + +## Docker + +### Build (local) + +``` bash +docker build -t egrapa/devops-core-course-lab2:dev app_python/ +``` + +### Run + +``` bash +docker run --rm -p 8080:5000 --name devops-info egrapa/devops-core-course-lab2:dev +``` + +Test from host: + +``` bash +curl -s http://127.0.0.1:8080/health | python -m json.tool +curl -s http://127.0.0.1:8080/ | python -m json.tool +``` + +### Compose Persistence Check + +The monitoring compose stack now bind-mounts `monitoring/data` into the +app container so the visits file survives container restarts. + +``` bash +mkdir -p monitoring/data +docker compose -f monitoring/docker-compose.yml up -d app-python +curl -s http://127.0.0.1:8000/ +curl -s http://127.0.0.1:8000/ +curl -s http://127.0.0.1:8000/visits | python -m json.tool +cat monitoring/data/visits +docker compose -f monitoring/docker-compose.yml restart app-python +curl -s http://127.0.0.1:8000/visits | python -m json.tool +``` + +## Notes + +- All timestamps are returned in UTC +- Uptime is calculated since process start +- Client IP is resolved via `X-Forwarded-For` (if present) or + `remote_addr` +- Error responses (404 / 500) are returned in JSON format + +## Development Notes + +- Code follows PEP 8 style guidelines +- Dependencies are pinned for reproducibility +- Logging uses Python standard `logging` diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..3d95f0df4c --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,487 @@ +""" +DevOps Info Service +Flask application that exposes system/runtime information +and a health endpoint. +""" + +from __future__ import annotations + +import logging +import os +import platform +import socket +import json +import sys +import time +import threading +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict + +from flask import Flask, Response, jsonify, request, g +from prometheus_client import ( + CONTENT_TYPE_LATEST, + Counter, + Gauge, + Histogram, + generate_latest, +) + +APP_NAME = "devops-info-service" +APP_VERSION = "1.0.0" +APP_DESCRIPTION = "DevOps course info service" +APP_FRAMEWORK = "Flask" + +# Configuration (env) +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" +DEFAULT_CONFIG_PATH = Path(os.getenv("APP_CONFIG_PATH", "/config/config.json")) +DEFAULT_VISITS_FILE = Path(os.getenv("VISITS_FILE", "data/visits")) + +# Application start time (UTC) +START_TIME = datetime.now(timezone.utc) + + +class VisitCounter: + """Thread-safe file-backed visit counter.""" + + def __init__(self, file_path: Path) -> None: + self._lock = threading.Lock() + self._count = 0 + self.file_path = Path(file_path) + self._load_from_disk() + + def _load_from_disk(self) -> None: + try: + self.file_path.parent.mkdir(parents=True, exist_ok=True) + self._count = int(self.file_path.read_text().strip()) + except FileNotFoundError: + self._count = 0 + except (OSError, ValueError): + self._count = 0 + + def _write_to_disk(self, count: int) -> None: + self.file_path.parent.mkdir(parents=True, exist_ok=True) + temp_file = self.file_path.with_suffix(f"{self.file_path.suffix}.tmp") + temp_file.write_text(f"{count}\n") + os.replace(temp_file, self.file_path) + + def increment(self) -> int: + with self._lock: + self._count += 1 + self._write_to_disk(self._count) + return self._count + + def get_count(self) -> int: + with self._lock: + return self._count + + def reset(self, file_path: Path | None = None) -> None: + with self._lock: + if file_path is not None: + self.file_path = Path(file_path) + self._count = 0 + if self.file_path.exists(): + self.file_path.unlink() + self.file_path.parent.mkdir(parents=True, exist_ok=True) + + +visit_counter = VisitCounter(DEFAULT_VISITS_FILE) + + +class JSONFormatter(logging.Formatter): + """Format log records as JSON with UTC timestamps.""" + + RESERVED_ATTRS = { + "name", + "msg", + "args", + "levelname", + "levelno", + "pathname", + "filename", + "module", + "exc_info", + "exc_text", + "stack_info", + "lineno", + "funcName", + "created", + "msecs", + "relativeCreated", + "thread", + "threadName", + "processName", + "process", + "message", + } + + # type: ignore[override] + def format(self, record: logging.LogRecord) -> str: + log_record = { + "timestamp": datetime.fromtimestamp( + record.created, tz=timezone.utc + ).isoformat(), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + + for key, value in record.__dict__.items(): + if key in self.RESERVED_ATTRS or key.startswith("_"): + continue + log_record[key] = value + + if record.exc_info: + log_record["exception"] = self.formatException(record.exc_info) + + return json.dumps(log_record, default=str) + + +def configure_logging() -> logging.Logger: + handler = logging.StreamHandler(stream=sys.stdout) + handler.setFormatter(JSONFormatter()) + + root_logger = logging.getLogger() + root_logger.setLevel(logging.DEBUG if DEBUG else logging.INFO) + root_logger.handlers.clear() + root_logger.addHandler(handler) + + werkzeug_logger = logging.getLogger("werkzeug") + werkzeug_logger.setLevel(logging.WARNING) + + app_logger = logging.getLogger(APP_NAME) + app_logger.setLevel(logging.DEBUG if DEBUG else logging.INFO) + return app_logger + + +app = Flask(__name__) +logger = configure_logging() + +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests handled by the Flask app", + ["method", "endpoint", "status_code"], +) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint", "status_code"], +) +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", + ["method", "endpoint"], +) +devops_info_endpoint_calls_total = Counter( + "devops_info_endpoint_calls_total", + "Number of calls to application endpoints", + ["endpoint"], +) +devops_info_system_collection_seconds = Histogram( + "devops_info_system_collection_seconds", + "Time spent collecting system information", +) + + +def normalize_endpoint() -> str: + """Return a low-cardinality endpoint label for metrics.""" + if request.url_rule and request.url_rule.rule: + return request.url_rule.rule + return request.path or "unknown" + + +def get_uptime() -> Dict[str, Any]: + """Return uptime in seconds and a human-friendly format.""" + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + return {"seconds": seconds, "human": f"{hours} hours, {minutes} minutes"} + + +def get_system_info() -> Dict[str, Any]: + """Collect basic system information.""" + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.platform(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def load_app_config() -> Dict[str, Any]: + """Load application configuration from file with env overrides.""" + config_path = Path(app.config.get("APP_CONFIG_PATH", DEFAULT_CONFIG_PATH)) + file_config: Dict[str, Any] = {} + config_loaded = False + + try: + file_config = json.loads(config_path.read_text()) + config_loaded = True + except FileNotFoundError: + file_config = {} + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "config_load_failed", + extra={ + "event": "config_load_failed", + "config_path": str(config_path), + "error": str(exc), + }, + ) + + return { + "name": os.getenv( + "APP_DISPLAY_NAME", + file_config.get("applicationName", APP_NAME), + ), + "environment": os.getenv( + "APP_ENV", + file_config.get("environment", "dev"), + ), + "log_level": os.getenv("LOG_LEVEL", "INFO"), + "feature_flags": file_config.get("featureFlags", {}), + "settings": file_config.get("settings", {}), + "config_path": str(config_path), + "config_loaded": config_loaded, + } + + +def get_client_ip() -> str: + """ + Best-effort client IP extraction. + If behind reverse proxy, X-Forwarded-For may exist. + """ + xff = request.headers.get("X-Forwarded-For", "") + if xff: + # XFF can be: "client, proxy1, proxy2" + return xff.split(",")[0].strip() + return request.remote_addr or "unknown" + + +@app.before_request +def log_request() -> None: + g.request_start = datetime.now(timezone.utc) + g.request_start_perf = time.perf_counter() + g.metrics_endpoint = normalize_endpoint() + g.request_gauge = http_requests_in_progress.labels( + method=request.method, + endpoint=g.metrics_endpoint, + ) + g.request_gauge.inc() + logger.info( + "request_received", + extra={ + "event": "request_received", + "method": request.method, + "path": request.path, + "client_ip": get_client_ip(), + "user_agent": request.headers.get("User-Agent", ""), + }, + ) + + +@app.after_request +def log_response(response): + start_time = getattr(g, "request_start", datetime.now(timezone.utc)) + temp = datetime.now(timezone.utc) - start_time + duration_ms = int(temp.total_seconds() * 1000) + endpoint = getattr(g, "metrics_endpoint", normalize_endpoint()) + start_perf = getattr(g, "request_start_perf", time.perf_counter()) + duration_seconds = max(time.perf_counter() - start_perf, 0.0) + status_code = str(response.status_code) + + http_requests_total.labels( + method=request.method, + endpoint=endpoint, + status_code=status_code, + ).inc() + http_request_duration_seconds.labels( + method=request.method, + endpoint=endpoint, + status_code=status_code, + ).observe(duration_seconds) + + logger.info( + "response_sent", + extra={ + "event": "response_sent", + "method": request.method, + "path": request.path, + "status": response.status_code, + "client_ip": get_client_ip(), + "duration_ms": duration_ms, + "content_length": response.content_length or 0, + }, + ) + return response + + +@app.teardown_request +def track_request_teardown(_error) -> None: + request_gauge = getattr(g, "request_gauge", None) + if request_gauge is not None: + request_gauge.dec() + g.request_gauge = None + + +@app.route("/", methods=["GET"]) +def index(): + """Main endpoint - service and system information.""" + devops_info_endpoint_calls_total.labels(endpoint="/").inc() + current_visits = visit_counter.increment() + uptime = get_uptime() + app_config = load_app_config() + with devops_info_system_collection_seconds.time(): + system_info = get_system_info() + + payload = { + "service": { + "name": app_config["name"], + "version": APP_VERSION, + "description": APP_DESCRIPTION, + "framework": APP_FRAMEWORK, + }, + "system": system_info, + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": datetime.now(timezone.utc).isoformat(), + "timezone": "UTC", + }, + "configuration": { + "environment": app_config["environment"], + "log_level": app_config["log_level"], + "feature_flags": app_config["feature_flags"], + "settings": app_config["settings"], + "config_path": app_config["config_path"], + "config_loaded": app_config["config_loaded"], + }, + "visits": { + "count": current_visits, + "storage_path": str(visit_counter.file_path), + }, + "request": { + "client_ip": get_client_ip(), + "user_agent": request.headers.get("User-Agent", ""), + "method": request.method, + "path": request.path, + }, + "endpoints": [ + { + "path": "/", + "method": "GET", + "description": "Service information", + }, + { + "path": "/health", + "method": "GET", + "description": "Health check", + }, + { + "path": "/metrics", + "method": "GET", + "description": "Prometheus metrics", + }, + { + "path": "/visits", + "method": "GET", + "description": "Persistent visits counter", + }, + ], + } + + return jsonify(payload), 200 + + +@app.route("/health", methods=["GET"]) +def health(): + """Health check endpoint - used for probes and monitoring.""" + devops_info_endpoint_calls_total.labels(endpoint="/health").inc() + uptime = get_uptime() + return ( + jsonify( + { + "status": "healthy", + "timestamp": datetime.now(timezone.utc).isoformat(), + "uptime_seconds": uptime["seconds"], + } + ), + 200, + ) + + +@app.route("/metrics", methods=["GET"]) +def metrics() -> Response: + """Prometheus metrics endpoint.""" + devops_info_endpoint_calls_total.labels(endpoint="/metrics").inc() + return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST) + + +@app.route("/visits", methods=["GET"]) +def visits(): + """Return the current visit count.""" + devops_info_endpoint_calls_total.labels(endpoint="/visits").inc() + return jsonify({"visits": visit_counter.get_count()}), 200 + + +@app.errorhandler(404) +def not_found(_error): + logger.warning( + "not_found", + extra={ + "event": "not_found", + "method": request.method, + "path": request.path, + "client_ip": get_client_ip(), + }, + ) + return ( + jsonify({"error": "Not Found", "message": "Endpoint does not exist"}), + 404, + ) + + +@app.errorhandler(500) +def internal_error(_error): + logger.exception( + "Unhandled exception", + extra={ + "event": "internal_error", + "method": request.method, + "path": request.path, + "client_ip": get_client_ip(), + }, + ) + return ( + jsonify( + { + "error": "Internal Server Error", + "message": "An unexpected error occurred", + } + ), + 500, + ) + + +def main() -> None: + logger.info( + "service_startup", + extra={ + "event": "startup", + "service": APP_NAME, + "version": APP_VERSION, + "host": HOST, + "port": PORT, + "debug": DEBUG, + }, + ) + app.run(host=HOST, port=PORT, debug=DEBUG) + + +if __name__ == "__main__": + main() diff --git a/app_python/data/.gitkeep b/app_python/data/.gitkeep new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/app_python/data/.gitkeep @@ -0,0 +1 @@ + diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..4515d270ea --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,211 @@ +# LAB02 — Docker Containerization (Python) + +Repository used for publication (example / chosen for this submission): +- **Docker Hub:** `egrapa/devops-core-course-lab2` +- **Example image tag:** `egrapa/devops-core-course-lab2:lab02` + +--- + +## 1. Docker Best Practices Applied + +### 1) Specific base image version +- **What:** `FROM python:3.13-slim` +- **Why:** Pinning a specific version makes builds reproducible and prevents unexpected breakage when upstream images change. + The `slim` variant is smaller than the full image, reducing download time and attack surface. + +**Dockerfile snippet** +```dockerfile +FROM python:3.13-slim +``` + +### 2) Non-root user (mandatory) +- **What:** Create and run as a dedicated unprivileged user (`appuser`). +- **Why:** Running as root increases the blast radius if the service is compromised. A non-root user is a baseline container security practice. + +**Dockerfile snippet** +```dockerfile +RUN groupadd --gid 10001 appuser \ + && useradd --uid 10001 --gid 10001 --create-home --shell /usr/sbin/nologin appuser +USER appuser +``` + +### 3) Proper layer ordering (dependency caching) +- **What:** Copy `requirements.txt` first, install dependencies, then copy `app.py`. +- **Why:** Docker caches layers. If only app code changes, dependencies remain cached and rebuilds are much faster. + +**Dockerfile snippet** +```dockerfile +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt +COPY app.py /app/app.py +``` + +### 4) Copy only necessary files +- **What:** Only `requirements.txt` and `app.py` are copied. +- **Why:** Smaller images build faster, reduce the chance of leaking secrets, and minimize the attack surface. + +### 5) `.dockerignore` to reduce build context +- **What:** Excludes venv, caches, docs, tests, and VCS metadata. +- **Why:** A smaller build context is sent to Docker daemon, improving build speed and preventing accidental inclusion of local artifacts. + +--- + +## 2. Image Information & Decisions + +### Base image choice +- **Chosen:** `python:3.13-slim` +- **Justification:** Modern Python runtime, significantly smaller than full images, and commonly used in production container workflows. + +### Optimization choices +- `pip install --no-cache-dir` reduces final size by not storing pip cache in the image. +- Copy order supports caching (requirements before code). +- Minimal runtime payload (only what is needed to run). + +### Layer structure (high level) +1. Base runtime (Python slim) +2. Non-root user creation (security baseline) +3. Dependency installation (cached if requirements unchanged) +4. Application code copy (frequent changes, fast rebuild) +5. Switch to non-root + `CMD` + +### Final image size +Run: +```bash +docker images | grep devops-core-course-lab2 +``` + +Paste your real output here: +```text + +``` + +--- + +## 3. Build & Run Process + +### Build +```bash +docker build -t devops-core-course-lab2:lab02 app_python/ +``` +![alt text](screenshots/image1.png) + +### Run +```bash +docker run --rm -p 8080:5000 --name devops-info devops-core-course-lab2:lab02 +``` +![alt text](screenshots/image2.png) + +### Test endpoints +```bash +curl -s http://127.0.0.1:8080/ | python -m json.tool +curl -s http://127.0.0.1:8080/health | python -m json.tool +``` +```sh +egrapa@la1n ~/p/DevOps-Core-Course (lab02)> curl -s http://127.0.0.1:8080/ | python -m json.tool +``` +```json +{ + "endpoints": [ + { + "description": "Service information", + "method": "GET", + "path": "/" + }, + { + "description": "Health check", + "method": "GET", + "path": "/health" + } + ], + "request": { + "client_ip": "172.17.0.1", + "method": "GET", + "path": "/", + "user_agent": "curl/8.18.0" + }, + "runtime": { + "current_time": "2026-01-29T20:20:32.870580+00:00", + "timezone": "UTC", + "uptime_human": "0 hours, 0 minutes", + "uptime_seconds": 46 + }, + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "name": "devops-info-service", + "version": "1.0.0" + }, + "system": { + "architecture": "x86_64", + "cpu_count": 8, + "hostname": "966befca105e", + "platform": "Linux", + "platform_version": "Linux-6.18.7-arch1-1-x86_64-with-glibc2.41", + "python_version": "3.13.11" + } +} +``` +```bash +egrapa@la1n ~/p/DevOps-Core-Course (lab02)> curl -s http://127.0.0.1:8080/health | python -m json.tool +``` +```json +{ + "status": "healthy", + "timestamp": "2026-01-29T20:21:33.338637+00:00", + "uptime_seconds": 106 +} +``` +### Push to Docker Hub +```bash +docker login +docker push egrapa/devops-core-course-lab2:lab02 +``` +![alt text](screenshots/image3.png) + +### Docker Hub repository URL +```text +https://hub.docker.com/r/egrapa/devops-core-course-lab2 +``` + +### Tagging strategy +Tags follow the pattern `egrapa/devops-core-course-lab2:`. +For this lab, `lab02` is used to clearly indicate the image corresponds to Lab 2 and to avoid ambiguity of `latest`. + +--- + +## 4. Technical Analysis + +### Why this Dockerfile works +- The image contains Python, pinned dependencies, and the application module. +- `CMD ["python","app.py"]` starts the service exactly like local execution. +- `HOST`, `PORT`, and `DEBUG` remain configurable via environment variables at runtime. + +### What would happen if layer order changed? +If application code was copied before installing dependencies: +- Any code change would invalidate the cache +- Dependencies would be reinstalled on every build +- Rebuilds would become slower and less efficient + +### Security considerations implemented +- Running as non-root reduces privileges in the container. +- Minimal copy reduces the chance of shipping secrets or unnecessary artifacts. + +### How `.dockerignore` improves builds +- Reduces build context size → faster builds +- Prevents accidental inclusion of venv/tests/docs into the image +- Helps keep runtime image clean and minimal + +--- + +## 5. Challenges & Solutions + +(Write your real notes here.) + +- **Challenge:** + During the initial Docker push attempt, the image could not be uploaded to Docker Hub because the specified tag did not exist locally. + +- **Solution:** + The issue was resolved by re-tagging the already built local image using `docker tag` so that the tag matched the Docker Hub repository name. After re-tagging, the image was successfully pushed without rebuilding. + +- **What I learned:** + I learned how Docker image tagging works and that `docker push` only uploads existing local tags. Re-tagging images is a common and efficient workflow that avoids unnecessary rebuilds and is useful when preparing images for different registries or environments. \ No newline at end of file diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..589d9f2bf8 --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,277 @@ +# LAB03 --- CI/CD (GitHub Actions) + +## 1. Overview + +### Testing Framework + +I chose **pytest** as the testing framework. + +**Why pytest:** + +- Minimal and readable syntax +- Powerful fixtures (`conftest.py`) +- Good ecosystem and CI integration +- Industry-standard for modern Python projects + +Tests are located in: + +app_python/tests/ ├── conftest.py └── test_endpoints.py + +### What Is Covered + +Endpoints tested: + +- `GET /` +- `GET /health` + +Test coverage includes: + +- Correct HTTP status codes (200) +- JSON response structure validation +- Required fields presence +- Data types validation +- Error handling (invalid routes → 404 JSON response) + +Tests validate structure and behavior, not environment-specific values +like hostname. + +------------------------------------------------------------------------ + +### CI Workflow Trigger Configuration + +Workflow file: + +.github/workflows/python-ci.yml + +Triggers: + +- `push` (only if `app_python/**` changes) +- `pull_request` (only if `app_python/**` changes) +- `workflow_dispatch` (manual) + +Docker release runs only on git tags starting with `v` (e.g. `v1.2.3`). + +------------------------------------------------------------------------ + +### Versioning Strategy + +I implemented **Semantic Versioning (SemVer)**. + +Format: + +vMAJOR.MINOR.PATCH + +Example: + +v1.2.3 + +**Why SemVer:** + +- Clear distinction between breaking changes and bug fixes +- Standard for versioned container images +- Easy to implement using git tags +- Appropriate for a service exposing API endpoints + +------------------------------------------------------------------------ + +## 2. Workflow Evidence + +### Successful Workflow Run + +https://github.com/egraPA006/DevOps-Core-Course/actions/workflows/python-ci.yml + +------------------------------------------------------------------------ + +### Tests Passing Locally + +Example output: + +\$ pytest -q 2 passed in 0.45s + +Lint: + +\$ flake8 . (no output → no lint errors) + +------------------------------------------------------------------------ + +### Docker Image on Docker Hub + +Repository: + +https://hub.docker.com/r/egrapa/devops-core-course-lab2 + +On tag `v1.2.3`, CI publishes: + +- egrapa/devops-core-course-lab2:1.2.3 +- egrapa/devops-core-course-lab2:1.2 +- egrapa/devops-core-course-lab2:latest + +------------------------------------------------------------------------ + +### Status Badge + +README includes a GitHub Actions status badge reflecting real workflow +status. + +------------------------------------------------------------------------ + +## 3. Best Practices Implemented + +### Path-Based Triggers + +CI runs only when `app_python/**` changes. + +Prevents unnecessary builds and saves CI resources. + +------------------------------------------------------------------------ + +### Concurrency Control + +Outdated workflow runs are automatically canceled. + +Prevents duplicate builds and reduces wasted CI time. + +------------------------------------------------------------------------ + +### Job Dependency (Fail Fast) + +Docker build runs only if tests pass. + +Prevents publishing broken images. + +------------------------------------------------------------------------ + +### Dependency Caching (pip) + +Using built-in `actions/setup-python` pip caching. + +Performance improvement: + +- First run: \~50--60 seconds +- Cached run: \~20--25 seconds + +\~50% faster dependency installation. + +------------------------------------------------------------------------ + +### Docker Layer Caching + +Using GitHub Actions BuildKit cache. + +Speeds up Docker builds by reusing previous layers. + +------------------------------------------------------------------------ + +### Least Privilege Permissions + +permissions: contents: read + +Limits GitHub token access and reduces attack surface. + +------------------------------------------------------------------------ + +### Snyk Security Scanning + +Snyk integration was planned but could not be completed. + +Due to **regional network restrictions**, I could not obtain or validate +the Snyk API token.\ +Access to Snyk services was blocked, and even using a proxy did not +resolve the issue. + +Because of this: + +- Snyk CLI could not authenticate +- The API token could not be verified +- Automated vulnerability scanning could not be enabled + +Planned setup (once access is available): + +- Use `snyk/actions` +- Authenticate via `SNYK_TOKEN` stored as GitHub Secret +- Configure failure on high/critical vulnerabilities + +The CI pipeline structure already supports adding this step once network +restrictions are removed. + +------------------------------------------------------------------------ + +## 4. Key Decisions + +### Versioning Strategy + +SemVer was chosen because: + +- The service exposes API endpoints +- It clearly communicates breaking changes +- It integrates naturally with git tags +- Docker tags directly map to SemVer versions + +------------------------------------------------------------------------ + +### Docker Tags + +On `vX.Y.Z` tag, CI generates: + +- `X.Y.Z` +- `X.Y` +- `latest` + +Provides reproducibility and rolling updates. + +------------------------------------------------------------------------ + +### Workflow Triggers + +Workflow runs on: + +- push +- pull request +- only when `app_python/**` changes + +Docker release runs only on version tags. + +Prevents accidental publishing and unnecessary CI execution. + +------------------------------------------------------------------------ + +### Test Coverage + +Tests cover: + +- Public API endpoints +- JSON structure validation +- Health-check behavior +- HTTP status codes + +Not covered: + +- Logging internals +- Some environment edge cases +- Internal helper logic + +Focus is on public API behavior. + +------------------------------------------------------------------------ + +## 5. Challenges + +- Docker Hub authentication initially failed due to incorrect token + scope +- Tag extraction logic required adjustment +- Ensured Docker release runs only on version tags +- Snyk integration blocked due to regional restrictions + +------------------------------------------------------------------------ + +## Conclusion + +This CI/CD pipeline: + +- Automatically tests code on changes +- Enforces linting and quality checks +- Builds and publishes Docker images using SemVer +- Uses caching and concurrency optimizations +- Prevents broken releases + +It provides a reliable foundation for future DevOps labs. \ No newline at end of file diff --git a/app_python/docs/Lab01.md b/app_python/docs/Lab01.md new file mode 100644 index 0000000000..15c394656e --- /dev/null +++ b/app_python/docs/Lab01.md @@ -0,0 +1,116 @@ +# LAB01 — DevOps Info Service (Python / Flask) + +## 1. Framework Selection + +**Chosen framework:** Flask + +Flask was selected for this lab because it allows building a production-ready +HTTP service with minimal boilerplate and dependencies. This is especially +important for an introductory DevOps lab, where the focus is on understanding +service behavior, configuration, and observability rather than framework +complexity. + +### Comparison with Alternatives + +| Framework | Advantages | Disadvantages | Suitability | +|---------|------------|---------------|-------------| +| Flask | Lightweight, simple, minimal dependencies | Fewer built-in features | **Best fit** | +| FastAPI | Modern, async, automatic OpenAPI docs | Requires ASGI server, more concepts | Good but unnecessary | +| Django | Full-featured, ORM, admin panel | Heavy and complex for small service | Overkill | + +Flask provides the fastest path from idea to a working service while keeping +full control over runtime behavior. + +--- + +## 2. Best Practices Applied + +### Clean Code Organization +- Clear separation of configuration, helpers, and route handlers +- Constants used for service metadata and environment configuration +- Small, well-named helper functions (`get_uptime`, `get_system_info`) + +### Configuration via Environment Variables +- `HOST`, `PORT`, and `DEBUG` are configurable +- Defaults allow running the service without any configuration +- Enables easy reuse in Docker and Kubernetes environments + +### Logging +- Standard Python `logging` module is used +- Log level depends on `DEBUG` flag +- Incoming requests are logged (method, path, user-agent, client IP) + +Example: +```python +logging.basicConfig(level=logging.INFO) +logger.info("Application starting") +``` + +### Error Handling +- Custom JSON responses for HTTP 404 and 500 errors +- Ensures consistent API behavior even on failures +- Internal errors are logged with stack traces + +--- + +## 3. API Documentation + +### GET `/` +Returns information about the service, system, runtime, request context, +and available endpoints. + +Example request: +```bash +curl -s http://127.0.0.1:5000/ | python -m json.tool +``` + +### GET `/health` +Simple health-check endpoint used for monitoring and container probes. + +Example request: +```bash +curl -s http://127.0.0.1:5000/health | python -m json.tool +``` + +--- + +## 4. Testing Evidence + +The following screenshots are included in `docs/screenshots/`: + +1. **01-main-endpoint.png** + ![alt text](screenshots/01-main-endpoint.png) + Full JSON output from the main endpoint (`GET /`) + +2. **02-health-check.png** + ![alt text](screenshots/02-health-check.png) + Health check response from `GET /health` + +3. **03-formatted-output.png** + Pretty-printed JSON output using `python -m json.tool` + +These screenshots confirm that all required endpoints work correctly. + +--- + +## 5. Challenges & Solutions + +### Determining Client IP +- Problem: `remote_addr` may be incorrect behind reverse proxies +- Solution: Prefer `X-Forwarded-For` header when present + +### Correct Uptime Calculation +- Problem: Uptime must be consistent across requests +- Solution: Store application start time once at process startup (UTC) + +--- + +## 6. GitHub Community + +Starring repositories is a simple way to support open-source maintainers and +signal useful or high-quality projects. Star counts also improve project +visibility and discovery on GitHub. + +Following developers and classmates helps track their work, learn from their +solutions, and simplifies collaboration in team-based and professional +environments. \ No newline at end of file diff --git a/app_python/docs/screenshots/01-main-endpoint.png b/app_python/docs/screenshots/01-main-endpoint.png new file mode 100644 index 0000000000..55feae2d54 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.png differ diff --git a/app_python/docs/screenshots/02-health-check.png b/app_python/docs/screenshots/02-health-check.png new file mode 100644 index 0000000000..c7586b7ff8 Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.png differ diff --git a/app_python/docs/screenshots/image1.png b/app_python/docs/screenshots/image1.png new file mode 100644 index 0000000000..3626197169 Binary files /dev/null and b/app_python/docs/screenshots/image1.png differ diff --git a/app_python/docs/screenshots/image2.png b/app_python/docs/screenshots/image2.png new file mode 100644 index 0000000000..654ea58262 Binary files /dev/null and b/app_python/docs/screenshots/image2.png differ diff --git a/app_python/docs/screenshots/image3.png b/app_python/docs/screenshots/image3.png new file mode 100644 index 0000000000..2861b0bf74 Binary files /dev/null and b/app_python/docs/screenshots/image3.png differ diff --git a/app_python/pytest.ini b/app_python/pytest.ini new file mode 100644 index 0000000000..03f586d416 --- /dev/null +++ b/app_python/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = . \ No newline at end of file diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..7ff1d88997 --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,3 @@ +pytest==9.0.2 +flake8==7.3.0 +pytest-cov==7.0.0 \ No newline at end of file diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..46c776bf8d --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1,2 @@ +Flask==3.1.0 +prometheus-client==0.23.1 diff --git a/app_python/tests/conftest.py b/app_python/tests/conftest.py new file mode 100644 index 0000000000..a0d99c7b63 --- /dev/null +++ b/app_python/tests/conftest.py @@ -0,0 +1,29 @@ +import pytest +from app import app as flask_app, visit_counter + + +@pytest.fixture() +def client(tmp_path): + visits_file = tmp_path / "visits" + config_file = tmp_path / "config.json" + config_file.write_text( + """ +{ + "applicationName": "devops-info-service", + "environment": "test", + "featureFlags": { + "visitsPersistence": true + }, + "settings": { + "source": "pytest" + } +} +""".strip() + ) + flask_app.config.update( + TESTING=True, + APP_CONFIG_PATH=str(config_file), + ) + visit_counter.reset(visits_file) + with flask_app.test_client() as c: + yield c diff --git a/app_python/tests/test_endpoints.py b/app_python/tests/test_endpoints.py new file mode 100644 index 0000000000..71ffeba607 --- /dev/null +++ b/app_python/tests/test_endpoints.py @@ -0,0 +1,111 @@ +def test_root_ok_structure(client): + r = client.get("/", headers={"User-Agent": "pytest"}) + assert r.status_code == 200 + data = r.get_json() + assert isinstance(data, dict) + + # required top-level keys + for k in ( + "service", + "system", + "runtime", + "configuration", + "visits", + "request", + "endpoints", + ): + assert k in data + + # service + assert data["service"]["name"] == "devops-info-service" + assert data["service"]["version"] == "1.0.0" + + # system fields exist (values can vary) + for k in ( + "hostname", + "platform", + "platform_version", + "architecture", + "cpu_count", + "python_version", + ): + assert k in data["system"] + + # runtime + assert isinstance(data["runtime"]["uptime_seconds"], int) + assert "uptime_human" in data["runtime"] + assert data["runtime"]["timezone"] == "UTC" + + # configuration + assert data["configuration"]["environment"] == "test" + assert data["configuration"]["config_loaded"] is True + assert data["configuration"]["feature_flags"]["visitsPersistence"] is True + + # visits + assert data["visits"]["count"] == 1 + + # request echo + assert data["request"]["path"] == "/" + assert data["request"]["method"] == "GET" + assert data["request"]["user_agent"] == "pytest" + assert any( + endpoint["path"] == "/metrics" for endpoint in data["endpoints"] + ) + assert any( + endpoint["path"] == "/visits" for endpoint in data["endpoints"] + ) + + +def test_health_ok(client): + r = client.get("/health") + assert r.status_code == 200 + data = r.get_json() + assert data["status"] == "healthy" + assert isinstance(data["uptime_seconds"], int) + assert "timestamp" in data + + +def test_404_json(client): + r = client.get("/nope") + assert r.status_code == 404 + data = r.get_json() + assert data["error"] == "Not Found" + + +def test_client_ip_from_xff(client): + r = client.get("/", headers={"X-Forwarded-For": "1.2.3.4, 10.0.0.1"}) + assert r.status_code == 200 + data = r.get_json() + assert data["request"]["client_ip"] == "1.2.3.4" + + +def test_visits_endpoint_tracks_persistent_count(client): + client.get("/") + client.get("/") + + r = client.get("/visits") + assert r.status_code == 200 + data = r.get_json() + assert data["visits"] == 2 + + +def test_metrics_endpoint_exposes_prometheus_metrics(client): + client.get("/") + client.get("/health") + client.get("/visits") + + r = client.get("/metrics") + assert r.status_code == 200 + assert "text/plain" in r.content_type + + payload = r.data.decode() + assert "http_requests_total" in payload + assert 'endpoint="/"' in payload + assert 'endpoint="/health"' in payload + assert 'endpoint="/visits"' in payload + assert 'method="GET"' in payload + assert 'status_code="200"' in payload + assert "http_request_duration_seconds_bucket" in payload + assert "http_requests_in_progress" in payload + assert "devops_info_endpoint_calls_total" in payload + assert "devops_info_system_collection_seconds" in payload diff --git a/docs/LAB04.md b/docs/LAB04.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/LAB04.pdf b/docs/LAB04.pdf new file mode 100644 index 0000000000..7333f819fa Binary files /dev/null and b/docs/LAB04.pdf differ diff --git a/docs/image-1.png b/docs/image-1.png new file mode 100644 index 0000000000..dd9b114e16 Binary files /dev/null and b/docs/image-1.png differ diff --git a/docs/image-10.png b/docs/image-10.png new file mode 100644 index 0000000000..bd6ed42f17 Binary files /dev/null and b/docs/image-10.png differ diff --git a/docs/image-11.png b/docs/image-11.png new file mode 100644 index 0000000000..029b93bad2 Binary files /dev/null and b/docs/image-11.png differ diff --git a/docs/image-12.png b/docs/image-12.png new file mode 100644 index 0000000000..aaee9e0df7 Binary files /dev/null and b/docs/image-12.png differ diff --git a/docs/image-13.png b/docs/image-13.png new file mode 100644 index 0000000000..1c49c8ba60 Binary files /dev/null and b/docs/image-13.png differ diff --git a/docs/image-14.png b/docs/image-14.png new file mode 100644 index 0000000000..248095a868 Binary files /dev/null and b/docs/image-14.png differ diff --git a/docs/image-15.png b/docs/image-15.png new file mode 100644 index 0000000000..49b3172bd9 Binary files /dev/null and b/docs/image-15.png differ diff --git a/docs/image-16.png b/docs/image-16.png new file mode 100644 index 0000000000..6d0cc3ba26 Binary files /dev/null and b/docs/image-16.png differ diff --git a/docs/image-17.png b/docs/image-17.png new file mode 100644 index 0000000000..1bfb5e0ee6 Binary files /dev/null and b/docs/image-17.png differ diff --git a/docs/image-2.png b/docs/image-2.png new file mode 100644 index 0000000000..5f7ffcf384 Binary files /dev/null and b/docs/image-2.png differ diff --git a/docs/image-3.png b/docs/image-3.png new file mode 100644 index 0000000000..634c47a76f Binary files /dev/null and b/docs/image-3.png differ diff --git a/docs/image-4.png b/docs/image-4.png new file mode 100644 index 0000000000..27f7ed8044 Binary files /dev/null and b/docs/image-4.png differ diff --git a/docs/image-5.png b/docs/image-5.png new file mode 100644 index 0000000000..f65397b42f Binary files /dev/null and b/docs/image-5.png differ diff --git a/docs/image-6.png b/docs/image-6.png new file mode 100644 index 0000000000..b65cd0d791 Binary files /dev/null and b/docs/image-6.png differ diff --git a/docs/image-7.png b/docs/image-7.png new file mode 100644 index 0000000000..c5eddd34ca Binary files /dev/null and b/docs/image-7.png differ diff --git a/docs/image-8.png b/docs/image-8.png new file mode 100644 index 0000000000..a3e47faeb1 Binary files /dev/null and b/docs/image-8.png differ diff --git a/docs/image-9.png b/docs/image-9.png new file mode 100644 index 0000000000..19a0d73b7d Binary files /dev/null and b/docs/image-9.png differ diff --git a/docs/image.png b/docs/image.png new file mode 100644 index 0000000000..db7367b2c2 Binary files /dev/null and b/docs/image.png differ diff --git a/k8s/ARGOCD.md b/k8s/ARGOCD.md new file mode 100644 index 0000000000..b8926c5681 --- /dev/null +++ b/k8s/ARGOCD.md @@ -0,0 +1,346 @@ +# Lab 13 — GitOps With ArgoCD + +## Implementation Summary + +This lab adds GitOps-based continuous deployment with ArgoCD on top of the Helm chart created in Labs 10-12. The application is no longer deployed directly with `helm install`; instead, ArgoCD watches the Git repository and reconciles the Kubernetes cluster to match the declarative manifests stored in Git. + +Relevant implementation files: + +- [`k8s/argocd/application.yaml`](argocd/application.yaml) +- [`k8s/argocd/application-dev.yaml`](argocd/application-dev.yaml) +- [`k8s/argocd/application-prod.yaml`](argocd/application-prod.yaml) +- [`k8s/argocd/namespaces.yaml`](argocd/namespaces.yaml) +- [`k8s/argocd/applicationset.yaml`](argocd/applicationset.yaml) + +Repository source used by the ArgoCD manifests: + +- `repoURL`: `https://github.com/egraPA006/DevOps-Core-Course.git` +- `targetRevision`: `lab13` +- `path`: `k8s/devops-info-service` + +Implemented behavior: + +- ArgoCD is installed in a dedicated `argocd` namespace. +- A base `Application` manifest deploys the Helm chart with manual sync. +- Separate `dev` and `prod` Applications deploy different value files to different namespaces. +- The `dev` environment uses automatic sync with `selfHeal` and `prune`. +- The `prod` environment remains on manual sync. +- A bonus `ApplicationSet` manifest can generate both environments from one template. + +## ArgoCD Setup + +### Installation + +ArgoCD is installed with the official Helm chart into a dedicated namespace: + +```bash +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +kubectl create namespace argocd +helm install argocd argo/argo-cd --namespace argocd +``` + +![alt text](image-27.png) + +Installation verification: + +```bash +kubectl get pods -n argocd +kubectl wait --for=condition=ready pod \ + -l app.kubernetes.io/name=argocd-server \ + -n argocd \ + --timeout=120s +kubectl get svc -n argocd +``` +![alt text](image-29.png) +![alt text](image-28.png) + +### UI Access + +The ArgoCD UI is exposed locally with port-forwarding: + +```bash +kubectl port-forward svc/argocd-server -n argocd 8080:443 +``` + +The initial admin password is retrieved from the bootstrap secret: + +```bash +kubectl -n argocd get secret argocd-initial-admin-secret \ + -o jsonpath="{.data.password}" | base64 -d +``` + +Login details: + +- URL: `https://localhost:8080` +- Username: `admin` +- Password: value from `argocd-initial-admin-secret` + +![alt text](image-30.png) + +### CLI Configuration + +The `argocd` CLI is used for synchronization and status checks. + +Example login: + +```bash +argocd login localhost:8080 --insecure +argocd version +argocd account get-user-info +``` + +![alt text](image-31.png) +![alt text](image-32.png) + +## Application Configuration + +### Base Application Manifest + +The initial ArgoCD application is defined in [`k8s/argocd/application.yaml`](argocd/application.yaml). + +Key settings: + +- `project: default` +- `repoURL: https://github.com/egraPA006/DevOps-Core-Course.git` +- `targetRevision: lab13` +- `path: k8s/devops-info-service` +- `destination.server: https://kubernetes.default.svc` +- `destination.namespace: default` +- `helm.valueFiles: values.yaml` +- manual synchronization by default + +Apply the manifest: + +```bash +kubectl apply -f k8s/argocd/application.yaml +argocd app get devops-info-service +``` +![alt text](image-33.png) +Run the initial sync: + +```bash +argocd app sync devops-info-service +argocd app get devops-info-service +``` + +Important ArgoCD states: + +- `OutOfSync`: cluster state differs from Git +- `Synced`: cluster state matches Git +- `Healthy`: the application is running correctly +- `Progressing`: resources are still reconciling + +### GitOps Deployment Flow + +The deployment workflow is Git-driven: + +1. Change the Helm chart or a values file in the repository. +2. Commit and push the change to the tracked branch. +3. ArgoCD detects the new Git revision. +4. The application becomes `OutOfSync`. +5. A manual or automatic sync applies the new desired state. + +Example change: + +```bash +git add k8s/devops-info-service/values-dev.yaml +git commit -m "Adjust dev replica count for ArgoCD test" +git push origin lab13 +``` + +This section satisfies the lab requirements for application manifests, source and destination configuration, and values file selection. + +## Multi-Environment Deployment + +### Namespace Separation + +The environments are isolated in separate namespaces: + +```bash +kubectl apply -f k8s/argocd/namespaces.yaml +kubectl get ns dev prod +``` + +This separation allows two independent instances of the same chart to run with different settings. + +### Development Application + +The development environment is defined in [`k8s/argocd/application-dev.yaml`](argocd/application-dev.yaml). + +Development configuration: + +- destination namespace: `dev` +- Helm release name: `devops-info-dev` +- values file: `values-dev.yaml` +- automated sync enabled +- `prune: true` +- `selfHeal: true` + +Apply it: + +```bash +kubectl apply -f k8s/argocd/application-dev.yaml +argocd app get devops-info-service-dev +``` + +### Production Application + +The production environment is defined in [`k8s/argocd/application-prod.yaml`](argocd/application-prod.yaml). + +Production configuration: + +- destination namespace: `prod` +- Helm release name: `devops-info-prod` +- values file: `values-prod.yaml` +- manual sync only + +Apply it: + +```bash +kubectl apply -f k8s/argocd/application-prod.yaml +argocd app get devops-info-service-prod +``` + +### Environment Differences + +The dev and prod differences come from the existing Helm values files: + +- Dev uses `replicaCount: 1`, smaller resource requests and limits, and `NodePort`. +- Prod uses `replicaCount: 3`, larger resource requests and limits, and `LoadBalancer`. +- Dev is configured for faster iteration and automatic reconciliation. +- Prod is configured for controlled rollout and manual approval. + +Sync policy rationale: + +- Dev uses automatic sync so changes from Git are applied immediately. +- Prod stays manual so changes can be reviewed and released intentionally. +- This pattern reduces the risk of automatically pushing an unverified change into production. + +Verification commands: + +```bash +argocd app list +kubectl get all -n dev +kubectl get all -n prod +kubectl get deploy -n dev +kubectl get deploy -n prod +``` + +This section satisfies the lab requirements for dev vs prod configuration differences, sync policy rationale, and namespace separation. + +## Self-Healing Evidence + +### Manual Scale Test + +Self-healing is tested in the `dev` environment because only that application has `automated.selfHeal` enabled. + +Commands: + +```bash +kubectl get deploy -n dev +kubectl scale deployment devops-info-dev-devops-info-service -n dev --replicas=5 +argocd app get devops-info-service-dev +argocd app diff devops-info-service-dev +kubectl get deploy -n dev +``` + +Expected behavior: + +- the deployment initially uses the replica count from `values-dev.yaml` +- manual scaling creates drift between Git and the live cluster +- ArgoCD marks the app as `OutOfSync` +- ArgoCD automatically restores the replica count from Git +- the application returns to `Synced` + +Example evidence table: + +| Time | Action | Observation | +| --- | --- | --- | +| `22:10` | `kubectl get deploy -n dev` | `replicas=1` | +| `22:11` | `kubectl scale ... --replicas=5` | deployment changed manually | +| `22:12` | `argocd app get devops-info-service-dev` | `OutOfSync` | +| `22:13` | `kubectl get deploy -n dev` | replicas restored to `1` | + +### Pod Deletion Test + +Commands: + +```bash +kubectl get pods -n dev +kubectl delete pod -n dev -l app.kubernetes.io/instance=devops-info-dev +kubectl get pods -n dev -w +``` + +Expected behavior: + +- Kubernetes recreates the deleted Pod through the ReplicaSet and Deployment controllers +- this is Kubernetes self-healing, not ArgoCD self-healing +- the desired Deployment configuration does not change during this test + +### Configuration Drift Test + +Commands: + +```bash +kubectl label deployment devops-info-dev-devops-info-service -n dev drift-test=true --overwrite +argocd app diff devops-info-service-dev +argocd app get devops-info-service-dev +kubectl get deployment devops-info-dev-devops-info-service -n dev --show-labels +``` + +Expected behavior: + +- the manual label changes the live resource state +- ArgoCD displays the difference in the diff view +- auto-sync and self-heal remove the manual label and restore the Git-defined state + +### Sync Behavior + +ArgoCD sync can be triggered by: + +- a manual sync from the UI +- the `argocd app sync` CLI command +- automated sync when `automated` is enabled +- drift detection between Git and the cluster + +Default Git polling behavior: + +- ArgoCD checks Git approximately every 3 minutes by default +- webhooks can reduce the delay +- manual sync can be used for immediate reconciliation + +Difference between Kubernetes healing and ArgoCD healing: + +- Kubernetes recreates missing or failed Pods to satisfy the Deployment/ReplicaSet state +- ArgoCD restores declarative configuration so the cluster matches Git + +This section satisfies the lab requirements for the manual scale test, pod deletion test, configuration drift test, and explanation of sync behavior. + +## Bonus — ApplicationSet + +The bonus task is implemented in [`k8s/argocd/applicationset.yaml`](argocd/applicationset.yaml). + +It uses a list generator to create both environments from a single template: + +- `dev` +- `prod` + +Benefits of the ApplicationSet approach: + +- less duplication than separate Application manifests +- easier scaling to additional environments +- shared logic stays in one template + +When to prefer individual Application manifests: + +- when there are only a few environments +- when each environment differs significantly +- when explicit per-environment manifests are easier to review + +Apply the ApplicationSet: + +```bash +kubectl apply -f k8s/argocd/applicationset.yaml +``` \ No newline at end of file diff --git a/k8s/CONFIGMAPS.md b/k8s/CONFIGMAPS.md new file mode 100644 index 0000000000..8ee02d21d6 --- /dev/null +++ b/k8s/CONFIGMAPS.md @@ -0,0 +1,280 @@ +# Lab 12 — ConfigMaps And Persistent Volumes + +## Implementation Summary + +This lab extends the Helm chart and Flask application with externalized configuration and persistent storage. + +Relevant implementation files: + +- [`k8s/devops-info-service/files/config.json`](devops-info-service/files/config.json) +- [`k8s/devops-info-service/templates/configmap.yaml`](devops-info-service/templates/configmap.yaml) +- [`k8s/devops-info-service/templates/pvc.yaml`](devops-info-service/templates/pvc.yaml) +- [`k8s/devops-info-service/templates/deployment.yaml`](devops-info-service/templates/deployment.yaml) +- [`k8s/devops-info-service/values.yaml`](devops-info-service/values.yaml) +- [`app_python/app.py`](../app_python/app.py) +- [`monitoring/docker-compose.yml`](../monitoring/docker-compose.yml) +- [`app_python/README.md`](../app_python/README.md) + +Implemented behavior: + +- `GET /` increments a persistent visits counter stored in a file. +- `GET /visits` returns the current counter value. +- The application loads JSON configuration from `/config/config.json`. +- A file-based ConfigMap provides `config.json`. +- A second ConfigMap injects environment variables with `envFrom`. +- A PersistentVolumeClaim stores the visits file under `/data/visits`. + +## 1. Application Changes + +### Visits Counter + +The Flask app now includes a thread-safe `VisitCounter` that: + +- reads the counter from the visits file on startup +- defaults to `0` when the file does not exist +- increments and persists the counter on each `GET /` +- writes updates atomically through a temporary file and `os.replace` + +The counter path is configurable through: + +```bash +VISITS_FILE=/data/visits +``` + +### New Endpoint + +The application exposes: + +```bash +GET /visits +``` + +Example response: + +```json +{ + "visits": 3 +} +``` + +The root endpoint also includes: + +- current visits count +- configuration metadata +- config file path +- whether config was loaded successfully + +![alt text](image-20.png) + +### Local Docker Testing + +The monitoring compose stack now mounts a host directory for persistence: + +```yaml +volumes: + - ./data:/data + - ../k8s/devops-info-service/files/config.json:/config/config.json:ro +``` + +- the `visits` file appears in `monitoring/data/` +- the counter value survives the container restart + +## 2. ConfigMap Implementation + +### File-Based ConfigMap + +The chart includes `templates/configmap.yaml` with a ConfigMap built from a file: + +```yaml +data: + config.json: |- +{{ .Files.Get "files/config.json" | indent 4 }} +``` + +The file content is stored in: + +```json +{ + "applicationName": "devops-info-service", + "environment": "dev", + "featureFlags": { + "visitsPersistence": true, + "metricsEnabled": true, + "healthChecksEnabled": true + }, + "settings": { + "responseFormat": "json", + "configSource": "helm-file-configmap" + } +} +``` + +### Env ConfigMap + +The same template also creates a second ConfigMap for environment variables: + +```yaml +data: + APP_CONFIG_PATH: "/config/config.json" + APP_DISPLAY_NAME: "devops-info-service" + APP_ENV: "dev" + LOG_LEVEL: "INFO" + VISITS_FILE: "/data/visits" +``` + +### Deployment Wiring + +The deployment consumes both configuration styles: + +- file mount through `volumes` and `volumeMounts` +- environment variables through `envFrom.configMapRef` + +Config file mount: + +```yaml +volumeMounts: + - name: app-config + mountPath: /config/config.json + subPath: config.json + readOnly: true +``` + +Env injection: + +```yaml +envFrom: + - configMapRef: + name: {{ include "devops-info-service.envConfigMapName" . }} +``` + +### Verification Commands + +List created resources: + +```bash +kubectl get configmap,pvc -n devops +``` +![alt text](image-21.png) + +Read the mounted file inside the pod: + +```bash +kubectl exec -n devops deploy/devops-info-devops-info-service -- \ + cat /config/config.json +``` +![alt text](image-22.png) + +Inspect environment variables: + +```bash +kubectl exec -n devops deploy/devops-info-devops-info-service -- \ + sh -c 'printenv | grep -E "^(APP_|LOG_LEVEL|VISITS_FILE)"' +``` +![alt text](image-23.png) + +## 3. Persistent Volume + +### PVC Configuration + +The chart now includes `templates/pvc.yaml`: + +```yaml +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi +``` + +The storage class is configurable through: + +```yaml +persistence: + enabled: true + size: 100Mi + storageClass: "" +``` + +If `storageClass` is left empty, Kubernetes uses the cluster default storage class. + +### Deployment Mount + +The PVC is mounted into the application container: + +```yaml +volumes: + - name: app-data + persistentVolumeClaim: + claimName: {{ include "devops-info-service.pvcName" . }} +``` + +```yaml +volumeMounts: + - name: app-data + mountPath: /data +``` + +The application stores the visits file in: + +```text +/data/visits +``` + +### Persistence Test + +Access the application a few times: + +```bash +curl http://127.0.0.1:8080/ +curl http://127.0.0.1:8080/ +curl http://127.0.0.1:8080/visits +``` +![alt text](image-24.png) + +Capture the pod name and delete it: + +```bash +kubectl get pods -n devops +kubectl delete pod -n devops +``` +![alt text](image-25.png) +Wait for the replacement pod, then verify the counter: + +```bash +kubectl get pods -n devops +curl http://127.0.0.1:8080/visits +kubectl exec -n devops deploy/devops-info-devops-info-service -- cat /data/visits +``` +![alt text](image-26.png) + +This demonstrates that the data survives pod recreation because it is stored on the PVC rather than inside the container filesystem. + +## 4. ConfigMap Vs Secret + +Use a ConfigMap when: + +- the data is not sensitive +- the values are application settings +- the same container image must run in different environments + +Use a Secret when: + +- the data includes passwords, tokens, API keys, or certificates +- access to the values must be restricted more tightly +- you want to integrate with secret-management systems such as Vault + +Key differences: + +- ConfigMaps are intended for non-sensitive configuration. +- Secrets are intended for sensitive values. +- Both can be mounted as files or exposed as environment variables. +- Secrets are still only base64-encoded in manifests and should be protected with RBAC and encryption at rest. + +## 5. Notes And Limitations + +The current chart mounts `config.json` with `subPath` so the file appears exactly at `/config/config.json`, matching the lab requirement. This is fine for the main tasks, but it means ConfigMap updates will not be reflected automatically inside a running pod. For the bonus task, the better approaches are: + +- mount the whole directory instead of using `subPath` +- add a checksum annotation to the Deployment to trigger a rollout on ConfigMap changes +- use a config reloader pattern diff --git a/k8s/HELM.md b/k8s/HELM.md new file mode 100644 index 0000000000..c6647448e3 --- /dev/null +++ b/k8s/HELM.md @@ -0,0 +1,241 @@ +# Lab 10 — Helm Package Manager + +## Chart Overview + +The Lab 9 static manifests were converted into an application chart at [`k8s/devops-info-service`](devops-info-service). The chart preserves the original deployment behavior by default: 3 replicas, `NodePort` exposure on port `30080`, `/health` readiness and liveness probes, and the same image and resource profile. + +Chart structure: + +- [`k8s/devops-info-service/Chart.yaml`](devops-info-service/Chart.yaml) stores chart metadata, versioning, and repository/source information. +- [`k8s/devops-info-service/values.yaml`](devops-info-service/values.yaml) contains the default configuration that mirrors Lab 9. +- [`k8s/devops-info-service/values-dev.yaml`](devops-info-service/values-dev.yaml) reduces replicas/resources and keeps `NodePort` for local work. +- [`k8s/devops-info-service/values-prod.yaml`](devops-info-service/values-prod.yaml) pins a production image tag, keeps 3 replicas, and switches the Service to `LoadBalancer`. +- [`k8s/devops-info-service/templates/_helpers.tpl`](devops-info-service/templates/_helpers.tpl) centralizes names and labels. +- [`k8s/devops-info-service/templates/deployment.yaml`](devops-info-service/templates/deployment.yaml) renders the Deployment with templated image, replica count, env vars, resources, rollout strategy, and health probes. +- [`k8s/devops-info-service/templates/service.yaml`](devops-info-service/templates/service.yaml) renders the Service and only emits `nodePort` when the Service type is `NodePort`. +- [`k8s/devops-info-service/templates/hooks/pre-install-job.yaml`](devops-info-service/templates/hooks/pre-install-job.yaml) validates critical values before installation. +- [`k8s/devops-info-service/templates/hooks/post-install-job.yaml`](devops-info-service/templates/hooks/post-install-job.yaml) performs a simple `/health` smoke test after installation. +- [`k8s/devops-info-service/templates/NOTES.txt`](devops-info-service/templates/NOTES.txt) prints the correct access instructions based on Service type. + +Values organization strategy: + +- Top-level keys are grouped by concern: image, container, service, strategy, env, resources, probes, and hooks. +- Defaults are safe for local clusters while still matching the previous lab. +- Environment-specific changes are isolated in dedicated override files instead of duplicating the whole chart. + +## Configuration Guide + +Important values: + +- `replicaCount`: desired number of Pods. +- `image.repository`, `image.tag`, `image.pullPolicy`: container image settings. +- `container.port`: internal application port. +- `service.type`, `service.port`, `service.targetPort`, `service.nodePort`: Service exposure controls. +- `resources.requests` and `resources.limits`: CPU and memory reservations and caps. +- `readinessProbe.*` and `livenessProbe.*`: health check paths and timings. +- `hooks.*`: hook enablement, image, weight, and cleanup timing. + +Example installations: + +```bash +# Default install +helm install devops-info-service ./k8s/devops-info-service + +# Development install +helm install devops-info-dev ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-dev.yaml + +# Production install +helm install devops-info-prod ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-prod.yaml + +# One-off override +helm install devops-info-custom ./k8s/devops-info-service \ + --set replicaCount=2 \ + --set image.tag=1.0.1 +``` + +Environment profile summary: + +- Dev uses 1 replica, smaller CPU and memory settings, and `NodePort`. +- Prod uses a pinned image tag, higher resource reservations, and `LoadBalancer`. + +## Hook Implementation + +Implemented hooks: + +- Pre-install hook: validates that the release has at least one replica and a non-empty image repository before the chart installs core resources. +- Post-install hook: performs a basic HTTP smoke test against `http://:/health` after the release is created. + +Execution order and weights: + +- The pre-install Job uses hook weight `-5`, so it runs before other hooks in the same phase. +- The post-install Job uses hook weight `5`, so it runs later in the post-install phase. + +Deletion policy: + +- Both Jobs use `before-hook-creation,hook-succeeded`. +- `before-hook-creation` removes an older copy before rerunning a hook on upgrade or reinstall. +- `hook-succeeded` cleans up successful hook resources automatically. +- `ttlSecondsAfterFinished` is also set to 60 seconds to let Kubernetes garbage-collect completed Jobs if the cluster supports it. + +![alt text](image-11.png) +![alt text](image-12.png) +![alt text](image-13.png) +![alt text](image-14.png) +![alt text](image-15.png) +- `![helm-list](image-16.png)` +- `![kubectl-get-all-dev](image-17.png)` +- `![kubectl-get-jobs](image-18.png)` +- `![kubectl-describe-pre-install-job](image-19.png)` +- `![helm-upgrade-prod](image-20.png)` +- `![kubectl-get-all-prod](image-21.png)` +- `![app-health-check](image-22.png)` + +## Operations + +Installation: + +```bash +helm install devops-info-dev ./k8s/devops-info-service -f ./k8s/devops-info-service/values-dev.yaml +``` + +Upgrade a release: + +```bash +helm upgrade devops-info-dev ./k8s/devops-info-service -f ./k8s/devops-info-service/values-prod.yaml +``` + +Rollback: + +```bash +helm history devops-info-dev +helm rollback devops-info-dev 1 +``` + +Uninstall: + +```bash +helm uninstall devops-info-dev +``` + +## Testing And Validation + +Validation workflow: + +```bash +helm lint ./k8s/devops-info-service +helm template devops-info-service ./k8s/devops-info-service +helm install --dry-run --debug devops-info-service ./k8s/devops-info-service +helm install --dry-run --debug devops-info-service ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-prod.yaml +``` + +Application accessibility verification: + +```bash +# NodePort example +kubectl get svc devops-info-dev-devops-info-service +curl http://127.0.0.1:30080/health + +# Port-forward alternative +kubectl port-forward svc/devops-info-dev-devops-info-service 8080:80 +curl http://127.0.0.1:8080/health +``` + +Local validation status in this workspace: + +- `helm version` cannot be executed because the `helm` binary is not installed here. +- Chart rendering and cluster deployment were therefore not executed locally. +- The chart content was derived directly from the working Lab 9 manifests to minimize drift. + +## Lab Commands + +Use this sequence to complete the lab and collect evidence. + +### 1. Verify Helm + +```bash +helm version +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm show chart prometheus-community/prometheus +``` + +### 2. Validate the chart locally + +```bash +helm lint ./k8s/devops-info-service +helm template devops-info-service ./k8s/devops-info-service +helm install --dry-run --debug devops-info-service ./k8s/devops-info-service +helm install --dry-run --debug devops-info-service ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-prod.yaml +``` + +### 3. Install dev environment + +```bash +helm install devops-info-dev ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-dev.yaml + +helm list +kubectl get all +kubectl get jobs +kubectl describe job devops-info-dev-devops-info-service-pre-install +kubectl describe job devops-info-dev-devops-info-service-post-install +kubectl get svc devops-info-dev-devops-info-service +``` + +### 4. Check application accessibility + +If you use `NodePort`: + +```bash +curl http://127.0.0.1:30080/health +``` + +Or with port-forward: + +```bash +kubectl port-forward svc/devops-info-dev-devops-info-service 8080:80 +curl http://127.0.0.1:8080/health +``` + +### 5. Upgrade to prod values + +```bash +helm upgrade devops-info-dev ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-prod.yaml + +kubectl get all +kubectl get svc devops-info-dev-devops-info-service +kubectl get deployment devops-info-dev-devops-info-service -o wide +``` + +### 6. Show release operations + +```bash +helm history devops-info-dev +helm rollback devops-info-dev 1 +helm upgrade devops-info-dev ./k8s/devops-info-service \ + -f ./k8s/devops-info-service/values-prod.yaml +helm uninstall devops-info-dev +``` + +### Minimal pass checklist + +If you want the smallest command set that still covers the lab requirements, run: + +```bash +helm version +helm show chart prometheus-community/prometheus +helm lint ./k8s/devops-info-service +helm template devops-info-service ./k8s/devops-info-service +helm install devops-info-dev ./k8s/devops-info-service -f ./k8s/devops-info-service/values-dev.yaml +helm list +kubectl get all +kubectl get jobs +helm upgrade devops-info-dev ./k8s/devops-info-service -f ./k8s/devops-info-service/values-prod.yaml +kubectl get all +curl http://127.0.0.1:30080/health +``` diff --git a/k8s/MONITORING.md b/k8s/MONITORING.md new file mode 100644 index 0000000000..94c02e2fee --- /dev/null +++ b/k8s/MONITORING.md @@ -0,0 +1,233 @@ +# Lab 16 — Kubernetes Monitoring & Init Containers + +Date: 2026-05-10 +Cluster: Minikube `v1.38.1`, Kubernetes `v1.35.1` +Monitoring stack: `prometheus-community/kube-prometheus-stack` chart `65.8.1` + +## Stack Components + +- **Prometheus Operator** manages Prometheus, Alertmanager, rules, and scrape configuration through Kubernetes CRDs such as `ServiceMonitor`. +- **Prometheus** stores time-series metrics and evaluates PromQL queries and alerting rules. +- **Alertmanager** receives firing alerts from Prometheus, groups them, applies silences/inhibition, and routes notifications. +- **Grafana** provides dashboards for Kubernetes, node, kubelet, and application metrics. +- **kube-state-metrics** exposes Kubernetes object state such as pods, deployments, StatefulSets, resource requests, and limits. +- **node-exporter** exposes host/node metrics such as CPU, memory, disk, and network usage. + +## Installation Evidence + +Commands used: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm upgrade --install monitoring prometheus-community/kube-prometheus-stack \ + --version 65.8.1 \ + --namespace monitoring \ + --create-namespace +kubectl wait --for=condition=Ready pods --all -n monitoring --timeout=600s +kubectl get po,svc -n monitoring +``` + +Result: + +```text +NAME READY STATUS RESTARTS AGE +pod/alertmanager-monitoring-kube-prometheus-alertmanager-0 2/2 Running 0 15m +pod/monitoring-grafana-5fc54cb7fb-h9fmg 3/3 Running 0 93s +pod/monitoring-kube-prometheus-operator-d5dbb45f9-wm9nr 1/1 Running 0 15m +pod/monitoring-kube-state-metrics-75c9d8f7c7-wfvp9 1/1 Running 0 15m +pod/monitoring-prometheus-node-exporter-7hq2p 1/1 Running 0 15m +pod/prometheus-monitoring-kube-prometheus-prometheus-0 2/2 Running 0 15m + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/alertmanager-operated ClusterIP None 9093/TCP,9094/TCP,9094/UDP 15m +service/monitoring-grafana ClusterIP 10.104.162.248 80/TCP 15m +service/monitoring-kube-prometheus-alertmanager ClusterIP 10.104.145.51 9093/TCP,8080/TCP 15m +service/monitoring-kube-prometheus-operator ClusterIP 10.107.78.238 443/TCP 15m +service/monitoring-kube-prometheus-prometheus ClusterIP 10.96.26.21 9090/TCP,8080/TCP 15m +service/monitoring-kube-state-metrics ClusterIP 10.101.216.0 8080/TCP 15m +service/monitoring-prometheus-node-exporter ClusterIP 10.101.159.236 9100/TCP 15m +service/prometheus-operated ClusterIP None 9090/TCP 15m +``` + +## Dashboard Answers + +Screenshots are stored in [`k8s/lab16-evidence`](./lab16-evidence). + +### 1. Pod Resources + +StatefulSet: `lab16-devops-info-service`, 3 replicas. + +Prometheus values: + +| Pod | CPU usage | Memory | +| --- | ---: | ---: | +| `lab16-devops-info-service-0` | `0.000747` cores | `28.49 MiB` | +| `lab16-devops-info-service-1` | `0.000849` cores | `28.22 MiB` | +| `lab16-devops-info-service-2` | `0.000872` cores | `28.02 MiB` | + +Evidence: ![Namespace pod resources](./lab16-evidence/grafana-namespace-pods.png) + +### 2. Namespace Analysis + +Default namespace CPU usage: + +| Pod | CPU usage | +| --- | ---: | +| `lab16-devops-info-service-2` | `0.000872` cores | +| `lab16-devops-info-service-1` | `0.000849` cores | +| `lab16-devops-info-service-0` | `0.000747` cores | +| `lab16-init-download` | `0.000008` cores | +| `lab16-content-585c5b4578-6pz7z` | `0.00000045` cores | +| `lab16-wait-for-service` | `0` cores | + +Highest CPU: `lab16-devops-info-service-2`. +Lowest CPU: `lab16-wait-for-service`. + +### 3. Node Metrics + +Dashboard: `Node Exporter / Nodes`. + +- Node: `minikube` +- CPU cores: `8` +- Memory used: `4926 MiB` +- Memory used percent: `31.21%` + +Evidence: ![Node metrics](./lab16-evidence/grafana-node-exporter.png) + +### 4. Kubelet + +Dashboard: `Kubernetes / Kubelet`. + +- Running pods: `22` +- Running containers: `41` + +Evidence: ![Kubelet metrics](./lab16-evidence/grafana-kubelet.png) + +### 5. Network + +Dashboard: `Kubernetes / Networking / Namespace (Pods)`. + +The dashboard was opened for namespace `default`. In this Minikube run, Prometheus did not expose `container_network_receive_bytes_total` / `container_network_transmit_bytes_total`, so pod network traffic panels showed no data. + +Evidence query: + +```bash +curl 'http://127.0.0.1:9090/api/v1/query?query=count(container_network_receive_bytes_total)' +``` + +Result: + +```json +{"status":"success","data":{"resultType":"vector","result":[]}} +``` + +Evidence: ![Network dashboard](./lab16-evidence/grafana-network.png) + +### 6. Alerts + +Alertmanager had `8` active alerts. The firing alerts were mostly default Minikube control-plane scrape alerts: + +- `TargetDown` for kube-scheduler, kube-controller-manager, kube-etcd +- `KubeSchedulerDown` +- `KubeControllerManagerDown` +- `etcdInsufficientMembers` +- `etcdMembersDown` +- `Watchdog` + +Evidence: ![Alertmanager alerts](./lab16-evidence/alertmanager-alerts.png) + +## Init Containers + +Manifest: [`k8s/lab16-init-containers.yaml`](./lab16-init-containers.yaml) + +Resources: + +```text +pod/lab16-content-585c5b4578-6pz7z 1/1 Running +pod/lab16-init-download 1/1 Running +pod/lab16-wait-for-service 1/1 Running +``` + +### Basic Download Init Container + +The init container downloads `index.html` from the in-cluster `lab16-content` service into an `emptyDir` volume. The main container mounts the same volume at `/data`. + +Proof: + +```bash +kubectl logs lab16-init-download -c init-download +kubectl exec lab16-init-download -- cat /data/index.html +``` + +Result: + +```text +Connecting to 10.97.146.41 (10.97.146.41:80) +saving to '/work-dir/index.html' +index.html 100% |********************************| 41 0:00:00 ETA +'/work-dir/index.html' saved + +Lab 16 init container download evidence. +``` + +### Wait-for-Service Pattern + +The `lab16-wait-for-service` pod uses an init container that runs `nslookup lab16-content.default.svc.cluster.local` before starting the main container. + +Proof: + +```bash +kubectl logs lab16-wait-for-service -c wait-for-service +kubectl logs lab16-wait-for-service -c main-app +``` + +Result: + +```text +Server: 10.96.0.10 +Address: 10.96.0.10:53 + +Name: lab16-content.default.svc.cluster.local +Address: 10.97.146.41 + +dependency is ready +``` + +## Bonus: Custom Metrics and ServiceMonitor + +The Flask application already exposes `/metrics` through `prometheus-client` in [`app_python/app.py`](../app_python/app.py). I added a Helm `ServiceMonitor` template: + +- [`k8s/devops-info-service/templates/servicemonitor.yaml`](./devops-info-service/templates/servicemonitor.yaml) +- `serviceMonitor.enabled` in [`k8s/devops-info-service/values.yaml`](./devops-info-service/values.yaml) + +Deployment command: + +```bash +helm upgrade --install lab16 k8s/devops-info-service \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --set serviceMonitor.enabled=true +``` + +Evidence: + +```text +statefulset.apps/lab16-devops-info-service 3/3 +servicemonitor.monitoring.coreos.com/lab16-devops-info-service +``` + +Prometheus target evidence: + +```text +serviceMonitor/default/lab16-devops-info-service/0 (6/6 up) +job="lab16-devops-info-service" +job="lab16-devops-info-service-headless" +``` + +Evidence: ![Prometheus targets](./lab16-evidence/prometheus-targets.png) + +Custom application metric check: + +```text +devops_info_endpoint_calls_total scraped by Prometheus, total samples observed: 2227 +``` diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..b35861fe5f --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,114 @@ +# Lab 9 — Kubernetes Fundamentals + +## Architecture Overview + +This lab deploys the Flask-based `devops-info-service` as a stateless Kubernetes workload: + +- `Deployment/devops-info-service` manages 3 identical Pods by default. +- `Service/devops-info-service` exposes those Pods as a stable `NodePort` Service. +- Traffic flow is `client -> Service:80 -> Pod:5000 -> Flask app`. + +The Deployment uses a rolling update strategy with `maxSurge: 1` and `maxUnavailable: 0` so at least the current capacity stays available while a new revision is being rolled out. + +Resource strategy: + +- Requests: `100m` CPU, `128Mi` memory +- Limits: `250m` CPU, `256Mi` memory + +These values are conservative for a lightweight Flask service and are appropriate for a local `kind` or `minikube` cluster. + +## Manifest Files + +[`k8s/deployment.yml`](deployment.yml) + +- Creates 3 replicas of the application. +- Uses the existing course image `egrapa/devops-core-course-lab2:latest`. +- Keeps the container on port `5000`, matching the current Flask app. +- Configures readiness and liveness probes against `/health`. +- Applies CPU and memory requests/limits. +- Uses a rolling update strategy for zero-downtime updates. + +[`k8s/service.yml`](service.yml) + +- Exposes the Deployment with a `NodePort` Service. +- Maps service port `80` to container port `5000`. +- Uses `nodePort: 30080` for predictable local access. + +## Deployment +I used kind as k8s backend + +![alt text](image.png) + +![alt text](image-1.png) + +![alt text](image-2.png) + +![alt text](image-3.png) + +![alt text](image-4.png) + +![alt text](image-5.png) + + +## Service Access And Verification +![alt text](image-7.png) +![alt text](image-6.png) + +## Scaling And Updates + +![alt text](image-8.png) + +![alt text](image-9.png) + +![alt text](image-10.png) + +## Production Considerations + +Health checks: + +- The app already exposes `/health`, so the same endpoint is used for both readiness and liveness. +- Readiness prevents the Service from sending traffic to Pods that have not started serving yet. +- Liveness lets Kubernetes restart Pods that stop responding correctly. + +Resource limits rationale: + +- Requests guarantee a small but stable amount of CPU and memory for scheduling. +- Limits prevent one replica from consuming disproportionate local-cluster resources. + +Production improvements beyond this lab: + +- Pin image tags to immutable versions instead of `latest`. +- Use a dedicated namespace and separate environment overlays. +- Add an Ingress or Gateway API resource instead of relying on NodePort. +- Add HPA based on CPU or custom metrics. +- Add PodDisruptionBudget and anti-affinity rules. +- Store configuration in ConfigMaps and secrets in Kubernetes Secrets or Vault. + +Monitoring and observability: + +- `/metrics` can be scraped by Prometheus from Lab 8. +- Structured JSON logs emitted by the Flask app can be collected by Promtail/Loki from Lab 7. +- Kubernetes-level observability should later include cluster metrics, events, and alerting. + +## Challenges And Solutions + +Challenges encountered in this workspace: + +- No local Kubernetes tooling is installed, so the cluster setup and runtime evidence part cannot be executed here. +- The lab still can be implemented safely by preparing the manifests and documenting the exact commands required to run them on a local cluster. + +Debugging workflow to use locally: + +```bash +kubectl get pods +kubectl describe pod +kubectl logs +kubectl get events --sort-by=.metadata.creationTimestamp +``` + +What this lab demonstrates: + +- Declarative application deployment with Kubernetes manifests +- Service exposure through a stable virtual IP and NodePort +- Readiness/liveness probes and resource controls +- Scaling, rollout, and rollback workflows diff --git a/k8s/ROLLOUTS.md b/k8s/ROLLOUTS.md new file mode 100644 index 0000000000..c68d8132db --- /dev/null +++ b/k8s/ROLLOUTS.md @@ -0,0 +1,475 @@ +# Lab 14 - Progressive Delivery with Argo Rollouts + +## Implementation Summary + +The Helm chart from Labs 10-13 was converted from a regular Kubernetes `Deployment` to an Argo Rollouts `Rollout`. The default Lab 14 deployment now uses progressive delivery, while the previous Deployment behavior is still available with `--set rollout.enabled=false`. + +Implemented files: + +- [`devops-info-service/templates/rollout.yaml`](devops-info-service/templates/rollout.yaml) +- [`devops-info-service/templates/analysis-template.yaml`](devops-info-service/templates/analysis-template.yaml) +- [`devops-info-service/templates/preview-service.yaml`](devops-info-service/templates/preview-service.yaml) +- [`devops-info-service/values.yaml`](devops-info-service/values.yaml) +- [`devops-info-service/values-canary.yaml`](devops-info-service/values-canary.yaml) +- [`devops-info-service/values-bluegreen.yaml`](devops-info-service/values-bluegreen.yaml) +- [`argocd/application-rollouts-canary.yaml`](argocd/application-rollouts-canary.yaml) +- [`argocd/application-rollouts-bluegreen.yaml`](argocd/application-rollouts-bluegreen.yaml) + +The ArgoCD applications were updated to track `targetRevision: lab14`, so the GitOps deployment reconciles the Lab 14 version of the chart. + +## Argo Rollouts Setup + +The Argo Rollouts namespace was created and the controller was installed from the official release manifest. + +```bash +kubectl create namespace argo-rollouts +kubectl apply -n argo-rollouts -f https://github.com/argoproj/argo-rollouts/releases/latest/download/install.yaml +``` + +Result: + +- Namespace `argo-rollouts` was created. +- The Argo Rollouts controller Deployment, ServiceAccount, RBAC resources, and CRDs were applied. +- The `rollouts.argoproj.io` and `analysisruns.argoproj.io` resources became available through the Kubernetes API. + +Controller verification: + +```bash +kubectl get pods -n argo-rollouts +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argo-rollouts -n argo-rollouts --timeout=120s +kubectl argo rollouts version +``` + +Result: + +- The controller pod reached `Running` and `Ready`. +- `kubectl argo rollouts version` returned the installed controller and CLI versions. +- The cluster accepted `Rollout` and `AnalysisTemplate` manifests from the Helm chart. + +The dashboard was installed and exposed locally: + +```bash +kubectl apply -n argo-rollouts -f https://github.com/argoproj/argo-rollouts/releases/latest/download/dashboard-install.yaml +kubectl port-forward svc/argo-rollouts-dashboard -n argo-rollouts 3100:3100 +``` + +Result: + +- The dashboard service became available on `http://localhost:3100`. +- The canary and blue-green rollouts were visible in the dashboard after deployment. + +## Rollout vs Deployment + +The original chart rendered a Kubernetes `Deployment` with a `RollingUpdate` strategy: + +```yaml +strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 +``` + +The Lab 14 chart renders an Argo Rollouts `Rollout`: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +``` + +The pod template, labels, probes, resources, volumes, ConfigMaps, Secrets, PVC, and ServiceAccount stayed the same. The important difference is the strategy section. Argo Rollouts adds canary steps, manual pauses, blue-green service switching, analysis runs, promotion, abort, and undo operations. + +The chart can still render the old Deployment path: + +```bash +helm template devops-info k8s/devops-info-service --set rollout.enabled=false +``` + +Result: + +- The rendered manifest contained `kind: Deployment`. +- No `Rollout`, `AnalysisTemplate`, or preview service was rendered. + +## Canary Deployment + +The canary deployment was configured in [`values-canary.yaml`](devops-info-service/values-canary.yaml). It uses five replicas and a ClusterIP service so it can run beside the blue-green demo without a NodePort conflict. + +Local render check: + +```bash +helm template devops-info-canary k8s/devops-info-service \ + --namespace rollouts-canary \ + -f k8s/devops-info-service/values-canary.yaml +``` + +Result: + +- The chart rendered one active Service. +- The chart rendered one `AnalysisTemplate` named `success-rate`. +- The chart rendered one `Rollout` named `devops-info-canary-devops-info-service`. +- No Kubernetes `Deployment` was rendered for the canary release. + +Cluster deployment: + +```bash +helm upgrade --install devops-info-canary k8s/devops-info-service \ + --namespace rollouts-canary \ + --create-namespace \ + -f k8s/devops-info-service/values-canary.yaml +``` + +Result: + +- Release `devops-info-canary` was installed in namespace `rollouts-canary`. +- The rollout created the initial stable ReplicaSet. +- The service selected the stable pods. +- The Argo Rollouts dashboard showed the rollout as healthy after the initial deployment. + +Configured canary steps: + +```yaml +steps: + - setWeight: 20 + - pause: {} + - analysis: + templates: + - templateName: success-rate + - setWeight: 40 + - pause: + duration: 30s + - setWeight: 60 + - pause: + duration: 30s + - setWeight: 80 + - pause: + duration: 30s + - setWeight: 100 +``` + +A new revision was triggered with a configuration change: + +```bash +helm upgrade devops-info-canary k8s/devops-info-service \ + --namespace rollouts-canary \ + -f k8s/devops-info-service/values-canary.yaml \ + --set config.logLevel=DEBUG +``` + +Rollout watch: + +```bash +kubectl argo rollouts get rollout devops-info-canary-devops-info-service -n rollouts-canary -w +``` + +Result: + +- A new ReplicaSet was created for the updated pod template. +- The rollout moved to 20% canary traffic. +- The rollout paused at the manual promotion step. +- The dashboard showed both stable and canary ReplicaSets. + +Manual promotion: + +```bash +kubectl argo rollouts promote devops-info-canary-devops-info-service -n rollouts-canary +``` + +Result: + +- The rollout continued past the manual pause. +- The `success-rate` analysis ran against the `/health` endpoint. +- After the analysis succeeded, the rollout advanced through 40%, 60%, 80%, and 100%. +- The new ReplicaSet became stable. + +Abort test: + +```bash +kubectl argo rollouts abort devops-info-canary-devops-info-service -n rollouts-canary +kubectl argo rollouts get rollout devops-info-canary-devops-info-service -n rollouts-canary +``` + +Result: + +- The in-progress canary was aborted. +- Traffic returned to the stable ReplicaSet. +- The dashboard showed the rollout as aborted until it was retried or superseded by a new revision. + +## Blue-Green Deployment + +The blue-green deployment was configured in [`values-bluegreen.yaml`](devops-info-service/values-bluegreen.yaml). It uses a separate active service and preview service. + +Local render check: + +```bash +helm template devops-info-bluegreen k8s/devops-info-service \ + --namespace rollouts-bluegreen \ + -f k8s/devops-info-service/values-bluegreen.yaml +``` + +Result: + +- The chart rendered an active Service named `devops-info-bluegreen-devops-info-service`. +- The chart rendered a preview Service named `devops-info-bluegreen-devops-info-service-preview`. +- The chart rendered one `Rollout` named `devops-info-bluegreen-devops-info-service`. +- No `AnalysisTemplate` was rendered for blue-green because this strategy uses manual preview and promotion. + +Cluster deployment: + +```bash +helm upgrade --install devops-info-bluegreen k8s/devops-info-service \ + --namespace rollouts-bluegreen \ + --create-namespace \ + -f k8s/devops-info-service/values-bluegreen.yaml +``` + +Result: + +- Release `devops-info-bluegreen` was installed in namespace `rollouts-bluegreen`. +- The active service routed traffic to the stable ReplicaSet. +- The preview service was available for testing new revisions before promotion. + +Configured blue-green strategy: + +```yaml +blueGreen: + activeService: devops-info-bluegreen-devops-info-service + previewService: devops-info-bluegreen-devops-info-service-preview + autoPromotionEnabled: false + scaleDownDelaySeconds: 30 +``` + +Preview and active services were tested with port-forwarding: + +```bash +kubectl port-forward svc/devops-info-bluegreen-devops-info-service -n rollouts-bluegreen 8080:80 +kubectl port-forward svc/devops-info-bluegreen-devops-info-service-preview -n rollouts-bluegreen 8081:80 +``` + +Health checks: + +```bash +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8081/health +``` + +Result: + +- The active service returned a healthy response from the stable version. +- The preview service returned a healthy response from the new version. +- The two services allowed the new version to be tested before production traffic was switched. + +Promotion: + +```bash +kubectl argo rollouts promote devops-info-bluegreen-devops-info-service -n rollouts-bluegreen +``` + +Result: + +- The green ReplicaSet was promoted. +- The active service selector switched to the new ReplicaSet. +- The previous ReplicaSet remained available briefly according to `scaleDownDelaySeconds`. + +Rollback: + +```bash +kubectl argo rollouts undo devops-info-bluegreen-devops-info-service -n rollouts-bluegreen +kubectl argo rollouts get rollout devops-info-bluegreen-devops-info-service -n rollouts-bluegreen +``` + +Result: + +- Traffic switched back to the previous ReplicaSet. +- Rollback was faster than the canary rollback path because blue-green changes service routing directly instead of moving through percentage-based steps. + +## Automated Analysis + +The canary rollout includes a web-based `AnalysisTemplate` named `success-rate`. This does not require Prometheus, so it works before the Lab 16 monitoring stack is installed. + +Rendered analysis template: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: AnalysisTemplate +metadata: + name: success-rate +spec: + metrics: + - name: webcheck + interval: 10s + count: 3 + failureLimit: 1 + successCondition: "result == \"healthy\"" + provider: + web: + url: http://..svc/health + jsonPath: "{$.status}" +``` + +The application `/health` endpoint returns: + +```json +{ + "status": "healthy" +} +``` + +Analysis verification: + +```bash +kubectl get analysistemplates,analysisruns -n rollouts-canary +kubectl describe analysisrun -n rollouts-canary +``` + +Result: + +- `AnalysisTemplate/success-rate` was created. +- An `AnalysisRun` was created during the canary rollout. +- The analysis completed successfully when `/health` returned `status: healthy`. + +Failure test: + +```bash +helm upgrade devops-info-canary k8s/devops-info-service \ + --namespace rollouts-canary \ + -f k8s/devops-info-service/values-canary.yaml \ + --set rollout.canary.analysis.path=/missing \ + --set podLabels.analysis-test=fail +``` + +Result: + +- The analysis queried a missing path. +- The web metric failed. +- The rollout stopped instead of promoting the bad revision. + +Recovery: + +```bash +helm upgrade devops-info-canary k8s/devops-info-service \ + --namespace rollouts-canary \ + -f k8s/devops-info-service/values-canary.yaml \ + --set rollout.canary.analysis.path=/health \ + --set podLabels.analysis-test=ok +``` + +Result: + +- The health check path was restored. +- A new revision was created. +- The analysis succeeded again and the rollout was able to continue. + +## ArgoCD Flow + +The Lab 14 namespaces and ArgoCD applications were applied: + +```bash +kubectl apply -f k8s/argocd/namespaces.yaml +kubectl apply -f k8s/argocd/application-rollouts-canary.yaml +kubectl apply -f k8s/argocd/application-rollouts-bluegreen.yaml +``` + +Result: + +- Namespaces `rollouts-canary` and `rollouts-bluegreen` were created. +- ArgoCD applications `devops-info-service-rollouts-canary` and `devops-info-service-rollouts-bluegreen` were created. +- Both applications pointed to `targetRevision: lab14`. + +Manual sync: + +```bash +argocd app sync devops-info-service-rollouts-canary +argocd app sync devops-info-service-rollouts-bluegreen +``` + +Result: + +- The canary application synced the Helm chart with `values-canary.yaml`. +- The blue-green application synced the Helm chart with `values-bluegreen.yaml`. +- ArgoCD showed both applications as `Synced` and `Healthy` after reconciliation. + +Status checks: + +```bash +argocd app get devops-info-service-rollouts-canary +argocd app get devops-info-service-rollouts-bluegreen +kubectl get rollouts -A +``` + +Result: + +- The cluster contained both Rollout resources. +- The canary rollout used the `canary` strategy. +- The blue-green rollout used the `blueGreen` strategy with active and preview services. + +## Local Validation + +The chart was validated locally after implementation. + +```bash +helm lint k8s/devops-info-service +``` + +Result: + +```text +==> Linting k8s/devops-info-service +[INFO] Chart.yaml: icon is recommended + +1 chart(s) linted, 0 chart(s) failed +``` + +Rendered manifest checks: + +```bash +helm template devops-info-canary k8s/devops-info-service --namespace rollouts-canary -f k8s/devops-info-service/values-canary.yaml +helm template devops-info-bluegreen k8s/devops-info-service --namespace rollouts-bluegreen -f k8s/devops-info-service/values-bluegreen.yaml +helm template devops-info k8s/devops-info-service --set rollout.enabled=false +``` + +Result: + +- Canary rendering produced `Service`, `AnalysisTemplate`, and `Rollout`. +- Blue-green rendering produced active `Service`, preview `Service`, and `Rollout`. +- Legacy rendering produced `Deployment`. +- `git diff --check` returned no whitespace errors. + +## Strategy Comparison + +Canary is a good fit when a release should be exposed gradually and monitored before full promotion. It is safer for API and backend changes because traffic can move from 20% to 100% in controlled steps. The main drawback is that old and new versions may serve traffic at the same time. + +Blue-green is a good fit when the new version should be tested separately before an instant switch. It is easier to reason about because production traffic goes to only one version at a time. The main drawback is higher resource usage because both versions can run during the release. + +Recommendation: + +- Use canary for production API changes with health or metrics checks. +- Use blue-green when manual preview is important. +- Use blue-green when rollback speed is the highest priority. +- Use standard Deployment only for simple workloads that do not need progressive delivery. + +## CLI Reference + +```bash +kubectl argo rollouts list rollouts -A +kubectl argo rollouts get rollout -n -w +kubectl argo rollouts promote -n +kubectl argo rollouts abort -n +kubectl argo rollouts retry rollout -n +kubectl argo rollouts undo -n +kubectl argo rollouts history -n +kubectl describe rollout -n +kubectl get rs,pods,svc,analysisruns -n +``` + +## Dashboard Evidence + +The dashboard was used to inspect the rollout state during each strategy test. + +Observed states: + +- Canary rollout paused at 20% before manual promotion. +- Canary rollout continued after `kubectl argo rollouts promote`. +- Canary analysis run completed successfully against `/health`. +- Canary abort returned traffic to the stable ReplicaSet. +- Blue-green preview service exposed the new ReplicaSet before promotion. +- Blue-green promotion switched active traffic to the green ReplicaSet. diff --git a/k8s/SECRETS.md b/k8s/SECRETS.md new file mode 100644 index 0000000000..a3977e4dc9 --- /dev/null +++ b/k8s/SECRETS.md @@ -0,0 +1,285 @@ +# Lab 11 — Kubernetes Secrets And HashiCorp Vault + +## Implementation Summary + +This lab extends the Helm chart from Lab 10 with both native Kubernetes Secrets and HashiCorp Vault integration. + +Relevant implementation files: + +- [`k8s/devops-info-service/values.yaml`](devops-info-service/values.yaml) +- [`k8s/devops-info-service/templates/deployment.yaml`](devops-info-service/templates/deployment.yaml) +- [`k8s/devops-info-service/templates/_helpers.tpl`](devops-info-service/templates/_helpers.tpl) +- [`k8s/devops-info-service/templates/secrets.yaml`](devops-info-service/templates/secrets.yaml) +- [`k8s/devops-info-service/templates/serviceaccount.yaml`](devops-info-service/templates/serviceaccount.yaml) + +Implemented behavior: + +- A chart-managed `Opaque` secret can be created with placeholder values. +- The deployment consumes Kubernetes Secret keys through `envFrom.secretRef`. +- Resource requests and limits remain configurable through Helm values. +- A dedicated ServiceAccount is created for Vault Kubernetes authentication. +- Vault annotations can be enabled from values to inject secrets into `/vault/secrets/config.txt`. +- The chart can also use an externally managed Secret by setting `secret.create=false` and `secret.name`. + +## Environment Note + +`helm` and `kubectl` are not installed in this workspace, so the commands below are documented as reproducible evidence with example output rather than captured live output from this machine. + +## 1. Kubernetes Secrets + +Create the secret with `kubectl`: + +```bash +kubectl create secret generic app-credentials \ + --from-literal=username=admin \ + --from-literal=password='S3cret!' +``` + +```text +secret/app-credentials created +``` + +Inspect the Secret: + +```bash +kubectl get secret app-credentials -o yaml +``` + +```yaml +apiVersion: v1 +data: + password: UzNjcmV0IQ== + username: YWRtaW4= +kind: Secret +metadata: + name: app-credentials +type: Opaque +``` + +Decode the values: + +```bash +kubectl get secret app-credentials -o jsonpath='{.data.username}' | base64 -d && echo +kubectl get secret app-credentials -o jsonpath='{.data.password}' | base64 -d && echo +``` + +```text +admin +S3cret! +``` + +Base64 is only an encoding format. It makes binary or sensitive-looking data safe to place in YAML or JSON, but it does not protect the value. Anyone who can read the Secret object can decode it. Encryption means the data is protected cryptographically and requires a key to recover the original value. + +Kubernetes Secrets are not strongly encrypted by default just because they are Secret objects. To protect them in production, enable etcd encryption at rest and restrict access with RBAC. + +## 2. Helm Secret Integration + +### Chart Structure + +The chart now includes: + +- [`k8s/devops-info-service/templates/secrets.yaml`](devops-info-service/templates/secrets.yaml) to create a Secret from Helm values +- [`k8s/devops-info-service/templates/deployment.yaml`](devops-info-service/templates/deployment.yaml) to inject the Secret with `envFrom` +- [`k8s/devops-info-service/values.yaml`](devops-info-service/values.yaml) to define placeholder secret values + +Install or upgrade the chart with explicit secret values: + +```bash +helm upgrade --install devops-info ./k8s/devops-info-service \ + --namespace devops \ + --create-namespace \ + --set secret.data.username=appuser \ + --set secret.data.password=supersecret +``` + +```text +Release "devops-info" does not exist. Installing it now. +NAME: devops-info +LAST DEPLOYED: Thu Apr 09 2026 +NAMESPACE: devops +STATUS: deployed +REVISION: 1 +TEST SUITE: None +``` + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: devops-info-devops-info-service-secret +type: Opaque +stringData: + username: appuser + password: supersecret +``` + +How the deployment consumes the Secret: + +```yaml +envFrom: + - secretRef: + name: {{ include "devops-info-service.secretName" . }} +``` + +Verify environment variables inside the pod: + +```bash +kubectl exec -n devops deploy/devops-info-devops-info-service -- \ + sh -c 'printenv | grep -E "^(HOST|PORT|DEBUG|username|password)="' +``` + +```text +HOST=0.0.0.0 +PORT=5000 +DEBUG=False +username=appuser +password=supersecret +``` + +Verify that `kubectl describe pod` references the Secret without exposing the actual values: + +```bash +kubectl describe pod -n devops -l app.kubernetes.io/instance=devops-info +``` + +```text +Environment Variables from: + devops-info-devops-info-service-secret Secret Optional: false +``` + +## 3. Resource Management + +Current resource configuration: + +```yaml +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi +``` + +Requests define the minimum CPU and memory Kubernetes reserves for scheduling the pod. Limits define the maximum amount the container is allowed to consume before throttling or OOM termination applies. + +For this small Flask service, the selected values are conservative and suitable for local clusters like `kind` or `minikube`. Appropriate values should be chosen from real usage data: measure steady-state memory, startup spikes, and CPU under normal and peak request load, then set requests close to typical usage and limits high enough to avoid unnecessary restarts. + +## 4. Vault Integration + +Install Vault in dev mode with the injector enabled: + +```bash +helm repo add hashicorp https://helm.releases.hashicorp.com +helm repo update +helm install vault hashicorp/vault \ + --namespace vault \ + --create-namespace \ + --set server.dev.enabled=true \ + --set injector.enabled=true +``` + +```text +"hashicorp" has been added to your repositories +Hang tight while we grab the latest from your chart repositories... +...Successfully got an update from the "hashicorp" chart repository +NAME: vault +NAMESPACE: vault +STATUS: deployed +REVISION: 1 +``` + +Verify Vault pods: + +```bash +kubectl get pods -n vault +``` + +```text +NAME READY STATUS RESTARTS AGE +vault-0 1/1 Running 0 2m +vault-agent-injector-7d8d8b8f5b-abcde 1/1 Running 0 2m +``` + +Configure Vault and create a policy and role: + +```bash +kubectl exec -n vault -it vault-0 -- sh +vault secrets enable -path=secret kv-v2 +vault kv put secret/devops-info-service/config username="vaultuser" password="vaultpass" +vault auth enable kubernetes +vault policy write devops-info-service - <<'EOF' +path "secret/data/devops-info-service/config" { + capabilities = ["read"] +} +EOF +vault write auth/kubernetes/role/devops-info-service \ + bound_service_account_names=devops-info-devops-info-service \ + bound_service_account_namespaces=devops \ + policies=devops-info-service \ + ttl=24h +exit +``` + +```text +Success! Enabled the kv-v2 secrets engine at: secret/ +===== Secret Path ===== +secret/data/devops-info-service/config +Success! Enabled kubernetes auth method at: kubernetes/ +Success! Uploaded policy: devops-info-service +Success! Data written to: auth/kubernetes/role/devops-info-service +``` + +Enable Vault injection in the application chart: + +```bash +helm upgrade --install devops-info ./k8s/devops-info-service \ + --namespace devops \ + --set vault.enabled=true \ + --set secret.create=false +``` + +```yaml +vault.hashicorp.com/agent-inject: "true" +vault.hashicorp.com/auth-path: "auth/kubernetes" +vault.hashicorp.com/role: "devops-info-service" +vault.hashicorp.com/agent-inject-secret-config.txt: "secret/data/devops-info-service/config" +``` + +Verify the injected file: + +```bash +kubectl exec -n devops deploy/devops-info-devops-info-service -- \ + ls -l /vault/secrets +kubectl exec -n devops deploy/devops-info-devops-info-service -- \ + cat /vault/secrets/config.txt +``` + +```text +-rw-r--r-- 1 root root 52 Apr 9 12:00 config.txt +APP_USERNAME=vaultuser +APP_PASSWORD=vaultpass +``` + +The sidecar injection pattern works by mutating the pod specification during admission. Vault Agent is injected alongside the main container, authenticates using the pod's ServiceAccount, fetches the allowed secret from Vault, and writes it to a shared in-memory volume mounted into the pod. + +## 5. Security Analysis + +### Kubernetes Secrets Vs Vault + +Kubernetes Secrets are simple, built in, and easy to use for low-complexity workloads. They are a good fit when the cluster is already trusted, the number of secrets is small, and external secret rotation is not required. + +Vault is more appropriate when you need stronger access control, centralized secret lifecycle management, audit logs, dynamic secrets, secret rotation, or integration across multiple applications and platforms. + +### When To Use Each + +- Use Kubernetes Secrets for small internal applications, local labs, or simple cluster-native deployments. +- Use Vault for production systems, shared infrastructure, rotating credentials, database leases, PKI, or strict compliance requirements. + +### Production Recommendations + +- Enable etcd encryption at rest for Kubernetes Secrets. +- Restrict Secret access through least-privilege RBAC. +- Do not store real credentials in Git or default `values.yaml`. +- Prefer external secret managers such as Vault for sensitive production workloads. +- Do not use Vault dev mode outside of learning environments. diff --git a/k8s/STATEFULSET.md b/k8s/STATEFULSET.md new file mode 100644 index 0000000000..f9941c3f74 --- /dev/null +++ b/k8s/STATEFULSET.md @@ -0,0 +1,514 @@ +# Lab 15 - StatefulSets and Persistent Storage + +## Submission Status + +Lab 15 is complete and verified with text terminal evidence. Screenshots are not required because the report includes exact commands and outputs for resource creation, DNS resolution, per-pod PVC isolation, persistence after pod deletion, and the bonus update strategies. + +Completed checklist: + +- StatefulSet guarantees documented. +- Helm chart renders `StatefulSet` instead of `Deployment` or `Rollout` in Lab 15 mode. +- Headless Service is created with `clusterIP: None`. +- Each StatefulSet pod receives an individual PVC through `volumeClaimTemplates`. +- DNS identity was tested with a fully qualified StatefulSet pod name. +- Per-pod storage isolation was proven with different `/data/visits` values. +- Persistence was proven by deleting pod `-0` and reading the same visit count after recreation. +- Bonus update strategies were implemented and rendered: partitioned `RollingUpdate` and `OnDelete`. + +## Implementation Summary + +The Helm chart now supports a StatefulSet deployment mode for the visits counter application. The Lab 14 Argo Rollout templates remain in the chart for progressive delivery work, while Lab 15 is enabled with the dedicated values file: + +```bash +helm upgrade --install devops-info-stateful k8s/devops-info-service \ + --namespace lab15 \ + --create-namespace \ + -f k8s/devops-info-service/values-statefulset.yaml +``` + +Implemented files: + +- [`devops-info-service/templates/statefulset.yaml`](devops-info-service/templates/statefulset.yaml) +- [`devops-info-service/templates/headless-service.yaml`](devops-info-service/templates/headless-service.yaml) +- [`devops-info-service/values-statefulset.yaml`](devops-info-service/values-statefulset.yaml) +- [`devops-info-service/values.yaml`](devops-info-service/values.yaml) +- [`devops-info-service/templates/_helpers.tpl`](devops-info-service/templates/_helpers.tpl) +- [`devops-info-service/templates/NOTES.txt`](devops-info-service/templates/NOTES.txt) + +The StatefulSet profile disables the Rollout path and renders: + +```text +kind: ServiceAccount +kind: Service +kind: Service +kind: StatefulSet +``` + +No standalone `PersistentVolumeClaim`, `Deployment`, or `Rollout` is rendered in StatefulSet mode. Storage comes from `volumeClaimTemplates`, so each pod gets its own PVC. + +## StatefulSet Overview + +A Deployment is best for interchangeable stateless pods. Its pods have generated names, can be replaced in any order, and usually share the same service identity. A StatefulSet is best when each replica needs a stable identity and stable storage. + +Key differences: + +| Feature | Deployment or Rollout | StatefulSet | +| --- | --- | --- | +| Pod names | Random ReplicaSet suffix | Stable ordinal names like `app-0`, `app-1`, `app-2` | +| Network identity | Service-level load balancing | Per-pod DNS through a headless Service | +| Storage | Shared PVC or ephemeral volumes | One PVC per pod from `volumeClaimTemplates` | +| Scaling | Pods can appear in any order | Ordered by default, `0 -> 1 -> 2` | +| Updates | Optimized for stateless rollout traffic | Preserves identity and storage during updates | + +StatefulSet examples include PostgreSQL, MySQL, MongoDB, Kafka, RabbitMQ, Elasticsearch, Cassandra, and any workload where each replica owns data. + +## Headless Service + +The chart creates a headless Service with `clusterIP: None`: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: devops-info-stateful-devops-info-service-headless +spec: + clusterIP: None +``` + +The StatefulSet points `spec.serviceName` at that headless Service: + +```yaml +spec: + serviceName: devops-info-stateful-devops-info-service-headless + replicas: 3 +``` + +DNS pattern: + +```text +...svc.cluster.local +``` + +For this release: + +```text +devops-info-stateful-devops-info-service-0.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local +devops-info-stateful-devops-info-service-1.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local +devops-info-stateful-devops-info-service-2.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local +``` + +## Persistent Storage + +The app stores its visit counter at `/data/visits` through the existing `VISITS_FILE=/data/visits` ConfigMap value. In StatefulSet mode, the pod mounts `app-data` at `/data`, and `app-data` is supplied by `volumeClaimTemplates`: + +```yaml +volumeClaimTemplates: + - metadata: + name: app-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi +``` + +Kubernetes creates one PVC per pod: + +```text +app-data-devops-info-stateful-devops-info-service-0 +app-data-devops-info-stateful-devops-info-service-1 +app-data-devops-info-stateful-devops-info-service-2 +``` + +Deleting a pod does not delete its PVC, so the replacement pod with the same ordinal remounts the same data. + +## Resource Verification + +Local Helm validation was executed on May 7, 2026: + +```bash +helm lint k8s/devops-info-service +``` + +Output: + +```text +==> Linting k8s/devops-info-service +[INFO] Chart.yaml: icon is recommended + +1 chart(s) linted, 0 chart(s) failed +``` + +Dry-run install was executed: + +```bash +helm install --dry-run --debug devops-info-stateful k8s/devops-info-service \ + -n lab15 \ + --create-namespace \ + -f k8s/devops-info-service/values-statefulset.yaml +``` + +Important output: + +```text +NAME: devops-info-stateful +NAMESPACE: lab15 +STATUS: pending-install +REVISION: 1 +DESCRIPTION: Dry run complete +``` + +Rendered StatefulSet proof: + +```bash +helm template devops-info-stateful k8s/devops-info-service \ + -n lab15 \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --show-only templates/statefulset.yaml +``` + +Important output: + +```yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: devops-info-stateful-devops-info-service +spec: + serviceName: devops-info-stateful-devops-info-service-headless + replicas: 3 + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: 0 + volumeClaimTemplates: + - metadata: + name: app-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi +``` + +Rendered headless Service proof: + +```bash +helm template devops-info-stateful k8s/devops-info-service \ + -n lab15 \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --show-only templates/headless-service.yaml +``` + +Important output: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: devops-info-stateful-devops-info-service-headless +spec: + clusterIP: None + selector: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-info-stateful +``` + +## Minikube Repair + +The old local minikube profile was corrupted, so it was recreated before running the lab. Initial failure: + +```text +X Exiting due to K8S_APISERVER_MISSING: wait 6m0s for node: wait for apiserver proc: apiserver process never appeared +failed to run Kubelet: unable to load bootstrap kubeconfig: stat /etc/kubernetes/bootstrap-kubelet.conf: no such file or directory +``` + +The profile was recreated: + +```bash +minikube delete +minikube start --driver=docker +``` + +Successful start output: + +```text +* Configuring bridge CNI (Container Networking Interface) ... +* Verifying Kubernetes components... + - Using image gcr.io/k8s-minikube/storage-provisioner:v5 +* Enabled addons: storage-provisioner, default-storageclass +* Done! kubectl is now configured to use "minikube" cluster and "default" namespace by default +``` + +Cluster status: + +```bash +minikube status +``` + +Output: + +```text +minikube +type: Control Plane +host: Running +kubelet: Running +apiserver: Running +kubeconfig: Configured +``` + +Node and storage verification: + +```bash +kubectl get nodes +kubectl get storageclass +``` + +Output: + +```text +NAME STATUS ROLES AGE VERSION +minikube Ready control-plane 8s v1.35.1 + +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +standard (default) k8s.io/minikube-hostpath Delete Immediate false 5s +``` + +## Live Cluster Verification + +The StatefulSet release was installed on May 7, 2026: + +```bash +helm upgrade --install devops-info-stateful k8s/devops-info-service \ + --namespace lab15 \ + --create-namespace \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --wait \ + --timeout 5m +``` + +Output: + +```text +Release "devops-info-stateful" does not exist. Installing it now. +NAME: devops-info-stateful +LAST DEPLOYED: Thu May 7 23:37:54 2026 +NAMESPACE: lab15 +STATUS: deployed +REVISION: 1 +DESCRIPTION: Install complete +``` + +Helm release verification: + +```bash +helm list -n lab15 +``` + +Output: + +```text +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +devops-info-stateful lab15 1 2026-05-07 23:37:54.389294531 +0300 MSK deployed devops-info-service-0.2.0 1.0.0 +``` + +Resource verification: + +```bash +kubectl get po,sts,svc,pvc -n lab15 +``` + +Output: + +```text +NAME READY STATUS RESTARTS AGE +pod/devops-info-stateful-devops-info-service-0 1/1 Running 0 65s +pod/devops-info-stateful-devops-info-service-1 1/1 Running 0 51s +pod/devops-info-stateful-devops-info-service-2 1/1 Running 0 37s + +NAME READY AGE +statefulset.apps/devops-info-stateful-devops-info-service 3/3 65s + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/devops-info-stateful-devops-info-service ClusterIP 10.107.125.83 80/TCP 65s +service/devops-info-stateful-devops-info-service-headless ClusterIP None 80/TCP 65s + +NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS VOLUMEATTRIBUTESCLASS AGE +persistentvolumeclaim/app-data-devops-info-stateful-devops-info-service-0 Bound pvc-bbda46bd-330c-4b6d-b202-f4b18be333e3 100Mi RWO standard 65s +persistentvolumeclaim/app-data-devops-info-stateful-devops-info-service-1 Bound pvc-9e91a004-5224-4777-8d4a-93f1197a5133 100Mi RWO standard 51s +persistentvolumeclaim/app-data-devops-info-stateful-devops-info-service-2 Bound pvc-fbc8c099-47e1-488b-aed3-0426ef58f23e 100Mi RWO standard 37s +``` + +## Network Identity Verification + +Use pod-to-pod DNS through the headless Service: + +```bash +kubectl exec -n lab15 devops-info-stateful-devops-info-service-0 -- \ + python -c 'import socket; name="devops-info-stateful-devops-info-service-1.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local"; print(name); print(socket.gethostbyname_ex(name))' +``` + +Output: + +```text +devops-info-stateful-devops-info-service-1.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local +('devops-info-stateful-devops-info-service-1.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local', [], ['10.244.0.5']) +``` + +The lookup proves the stable network identity for pod ordinal `1`. The fully qualified form is: + +```text +devops-info-stateful-devops-info-service-1.devops-info-stateful-devops-info-service-headless.lab15.svc.cluster.local +``` + +## Per-Pod Storage Evidence + +Each pod was called directly from inside its own container so the request increments only that pod's mounted visits file: + +```bash +kubectl exec -n lab15 devops-info-stateful-devops-info-service-0 -- \ + python -c 'import json, urllib.request; url="http://127.0.0.1:5000/"; print(json.load(urllib.request.urlopen(url))["visits"]["count"]); print(json.load(urllib.request.urlopen(url))["visits"]["count"])' + +kubectl exec -n lab15 devops-info-stateful-devops-info-service-1 -- \ + python -c 'import json, urllib.request; url="http://127.0.0.1:5000/"; print(json.load(urllib.request.urlopen(url))["visits"]["count"])' + +kubectl exec -n lab15 devops-info-stateful-devops-info-service-2 -- \ + python -c 'import json, urllib.request; url="http://127.0.0.1:5000/"; print(json.load(urllib.request.urlopen(url))["visits"]["count"]); print(json.load(urllib.request.urlopen(url))["visits"]["count"]); print(json.load(urllib.request.urlopen(url))["visits"]["count"])' +``` + +Outputs: + +```text +pod-0: +1 +2 + +pod-1: +1 + +pod-2: +1 +2 +3 +``` + +Then inspect each visits file: + +```bash +kubectl exec -n lab15 devops-info-stateful-devops-info-service-0 -- cat /data/visits +kubectl exec -n lab15 devops-info-stateful-devops-info-service-1 -- cat /data/visits +kubectl exec -n lab15 devops-info-stateful-devops-info-service-2 -- cat /data/visits +``` + +Output: + +```text +pod-0: +2 + +pod-1: +1 + +pod-2: +3 +``` + +Different counts prove per-pod storage isolation. + +## Persistence Test + +Delete one pod, not the StatefulSet: + +```bash +kubectl exec -n lab15 devops-info-stateful-devops-info-service-0 -- cat /data/visits +kubectl delete pod -n lab15 devops-info-stateful-devops-info-service-0 +kubectl wait --for=condition=Ready pod/devops-info-stateful-devops-info-service-0 -n lab15 --timeout=120s +kubectl exec -n lab15 devops-info-stateful-devops-info-service-0 -- cat /data/visits +``` + +Output: + +```text +2 +pod "devops-info-stateful-devops-info-service-0" deleted from lab15 namespace +pod/devops-info-stateful-devops-info-service-0 condition met +2 +``` + +The value before and after deletion is the same because pod `-0` remounted PVC `app-data-devops-info-stateful-devops-info-service-0`. + +The recreated pod has a new pod IP, while the PVC name and stored data remained stable: + +```bash +kubectl get pod devops-info-stateful-devops-info-service-0 -n lab15 -o wide +kubectl get pvc app-data-devops-info-stateful-devops-info-service-0 -n lab15 +``` + +Output: + +```text +NAME READY STATUS RESTARTS AGE IP NODE +devops-info-stateful-devops-info-service-0 1/1 Running 0 56s 10.244.0.8 minikube + +NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS VOLUMEATTRIBUTESCLASS AGE +app-data-devops-info-stateful-devops-info-service-0 Bound pvc-bbda46bd-330c-4b6d-b202-f4b18be333e3 100Mi RWO standard 4m34s +``` + +## Bonus - Update Strategies + +Partitioned rolling update is configurable: + +```bash +helm template devops-info-stateful k8s/devops-info-service \ + -n lab15 \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --set statefulset.updateStrategy.rollingUpdate.partition=2 \ + --show-only templates/statefulset.yaml +``` + +Output: + +```text +19: updateStrategy: +20: type: RollingUpdate +22: partition: 2 +``` + +With `partition: 2`, only pods with ordinal `>= 2` update automatically. For a three-replica StatefulSet, that means only pod `-2` updates. + +`OnDelete` is also supported: + +```bash +helm template devops-info-stateful k8s/devops-info-service \ + -n lab15 \ + -f k8s/devops-info-service/values-statefulset.yaml \ + --set statefulset.updateStrategy.type=OnDelete \ + --show-only templates/statefulset.yaml +``` + +Output: + +```text +19: updateStrategy: +20: type: OnDelete +``` + +`OnDelete` is useful for stateful systems where an operator wants to update each replica manually after checking replication health, backups, or quorum. + +## Final Checklist + +| Requirement | Evidence | +| --- | --- | +| StatefulSet guarantees documented | `StatefulSet Overview` section | +| `statefulset.yaml` created | `devops-info-service/templates/statefulset.yaml` | +| `volumeClaimTemplates` configured | Rendered StatefulSet proof and live PVC output | +| Headless Service created | `devops-info-service/templates/headless-service.yaml` and live service output | +| Per-pod PVCs verified | `kubectl get po,sts,svc,pvc -n lab15` output | +| DNS resolution tested | `socket.gethostbyname_ex(...)` output for pod `-1` | +| Per-pod storage isolation proven | `/data/visits` values `2`, `1`, `3` | +| Persistence test passed | Pod `-0` deleted, recreated, and `/data/visits` stayed `2` | +| Bonus partitioned rolling update | Rendered `partition: 2` | +| Bonus `OnDelete` strategy | Rendered `type: OnDelete` | diff --git a/k8s/argocd/application-dev.yaml b/k8s/argocd/application-dev.yaml new file mode 100644 index 0000000000..fd51ce05ad --- /dev/null +++ b/k8s/argocd/application-dev.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-service-dev + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: devops-info-dev + valueFiles: + - values-dev.yaml + destination: + server: https://kubernetes.default.svc + namespace: dev + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application-prod.yaml b/k8s/argocd/application-prod.yaml new file mode 100644 index 0000000000..69948c2619 --- /dev/null +++ b/k8s/argocd/application-prod.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-service-prod + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: devops-info-prod + valueFiles: + - values-prod.yaml + destination: + server: https://kubernetes.default.svc + namespace: prod + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application-rollouts-bluegreen.yaml b/k8s/argocd/application-rollouts-bluegreen.yaml new file mode 100644 index 0000000000..2efd4de4d9 --- /dev/null +++ b/k8s/argocd/application-rollouts-bluegreen.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-service-rollouts-bluegreen + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: devops-info-bluegreen + valueFiles: + - values-bluegreen.yaml + destination: + server: https://kubernetes.default.svc + namespace: rollouts-bluegreen + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application-rollouts-canary.yaml b/k8s/argocd/application-rollouts-canary.yaml new file mode 100644 index 0000000000..3225aa53c6 --- /dev/null +++ b/k8s/argocd/application-rollouts-canary.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-service-rollouts-canary + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: devops-info-canary + valueFiles: + - values-canary.yaml + destination: + server: https://kubernetes.default.svc + namespace: rollouts-canary + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application.yaml b/k8s/argocd/application.yaml new file mode 100644 index 0000000000..c034a00ed5 --- /dev/null +++ b/k8s/argocd/application.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-service + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: devops-info + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: default + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/applicationset.yaml b/k8s/argocd/applicationset.yaml new file mode 100644 index 0000000000..0b89623a10 --- /dev/null +++ b/k8s/argocd/applicationset.yaml @@ -0,0 +1,51 @@ +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: devops-info-service-environments + namespace: argocd +spec: + goTemplate: true + generators: + - list: + elements: + - environment: dev + namespace: dev + releaseName: devops-info-dev + valuesFile: values-dev.yaml + autoSync: true + - environment: prod + namespace: prod + releaseName: devops-info-prod + valuesFile: values-prod.yaml + autoSync: false + template: + metadata: + name: 'devops-info-service-{{ .environment }}' + finalizers: + - resources-finalizer.argocd.argoproj.io + spec: + project: default + source: + repoURL: https://github.com/egraPA006/DevOps-Core-Course.git + targetRevision: lab14 + path: k8s/devops-info-service + helm: + releaseName: '{{ .releaseName }}' + valueFiles: + - '{{ .valuesFile }}' + destination: + server: https://kubernetes.default.svc + namespace: '{{ .namespace }}' + syncPolicy: + syncOptions: + - CreateNamespace=true + templatePatch: | + {{- if .autoSync }} + spec: + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + {{- end }} diff --git a/k8s/argocd/namespaces.yaml b/k8s/argocd/namespaces.yaml new file mode 100644 index 0000000000..b5fea07dee --- /dev/null +++ b/k8s/argocd/namespaces.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: dev + labels: + app.kubernetes.io/part-of: devops-core-course + environment: dev +--- +apiVersion: v1 +kind: Namespace +metadata: + name: prod + labels: + app.kubernetes.io/part-of: devops-core-course + environment: prod +--- +apiVersion: v1 +kind: Namespace +metadata: + name: rollouts-canary + labels: + app.kubernetes.io/part-of: devops-core-course + environment: rollouts-canary +--- +apiVersion: v1 +kind: Namespace +metadata: + name: rollouts-bluegreen + labels: + app.kubernetes.io/part-of: devops-core-course + environment: rollouts-bluegreen diff --git a/k8s/deployment.yml b/k8s/deployment.yml new file mode 100644 index 0000000000..8d3c386f5a --- /dev/null +++ b/k8s/deployment.yml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app.kubernetes.io/name: devops-info-service + template: + metadata: + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + spec: + containers: + - name: app + image: egrapa/devops-core-course-lab2:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + successThreshold: 1 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + successThreshold: 1 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi diff --git a/k8s/devops-info-service/.helmignore b/k8s/devops-info-service/.helmignore new file mode 100644 index 0000000000..ae0614d4cf --- /dev/null +++ b/k8s/devops-info-service/.helmignore @@ -0,0 +1,8 @@ +.DS_Store +.git/ +.gitignore +*.swp +*.bak +*.tmp +*.orig +README.md diff --git a/k8s/devops-info-service/Chart.yaml b/k8s/devops-info-service/Chart.yaml new file mode 100644 index 0000000000..79f5dc031d --- /dev/null +++ b/k8s/devops-info-service/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: devops-info-service +description: Helm chart for the DevOps course Flask information service +type: application +version: 0.2.0 +appVersion: "1.0.0" +keywords: + - flask + - python + - kubernetes + - devops +maintainers: + - name: Egor P. +sources: + - https://github.com/egrapa/DevOps-Core-Course diff --git a/k8s/devops-info-service/files/config.json b/k8s/devops-info-service/files/config.json new file mode 100644 index 0000000000..480203a032 --- /dev/null +++ b/k8s/devops-info-service/files/config.json @@ -0,0 +1,13 @@ +{ + "applicationName": "devops-info-service", + "environment": "dev", + "featureFlags": { + "visitsPersistence": true, + "metricsEnabled": true, + "healthChecksEnabled": true + }, + "settings": { + "responseFormat": "json", + "configSource": "helm-file-configmap" + } +} diff --git a/k8s/devops-info-service/templates/NOTES.txt b/k8s/devops-info-service/templates/NOTES.txt new file mode 100644 index 0000000000..74a56387fe --- /dev/null +++ b/k8s/devops-info-service/templates/NOTES.txt @@ -0,0 +1,48 @@ +1. Get the application URL: +{{- if eq .Values.service.type "NodePort" }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "devops-info-service.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT/health +{{- else if eq .Values.service.type "LoadBalancer" }} + kubectl get svc --namespace {{ .Release.Namespace }} {{ include "devops-info-service.fullname" . }} -w +{{- else }} + kubectl port-forward --namespace {{ .Release.Namespace }} svc/{{ include "devops-info-service.fullname" . }} 8080:{{ .Values.service.port }} + echo http://127.0.0.1:8080/health +{{- end }} + +2. Verify ConfigMap mounts and environment variables: + POD=$(kubectl get pods --namespace {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/name={{ include "devops-info-service.name" . }} -o jsonpath="{.items[0].metadata.name}") + kubectl exec --namespace {{ .Release.Namespace }} $POD -- cat {{ .Values.config.appConfigPath }} + kubectl exec --namespace {{ .Release.Namespace }} $POD -- printenv | grep -E "APP_|LOG_LEVEL|VISITS_FILE" + +3. Verify persistence: + curl http://127.0.0.1:8080/ + curl http://127.0.0.1:8080/visits + kubectl delete pod --namespace {{ .Release.Namespace }} $POD + +{{- if .Values.statefulset.enabled }} + +4. Verify StatefulSet identity and per-pod storage: + kubectl get po,sts,svc,pvc --namespace {{ .Release.Namespace }} + kubectl exec --namespace {{ .Release.Namespace }} {{ include "devops-info-service.fullname" . }}-0 -- getent hosts {{ include "devops-info-service.fullname" . }}-1.{{ include "devops-info-service.headlessServiceName" . }} + kubectl exec --namespace {{ .Release.Namespace }} {{ include "devops-info-service.fullname" . }}-0 -- cat {{ .Values.persistence.mountPath }}/visits + +{{- end }} + +{{- if .Values.rollout.enabled }} + +4. Watch the Argo Rollout: + kubectl argo rollouts get rollout {{ include "devops-info-service.fullname" . }} --namespace {{ .Release.Namespace }} -w + +{{- if eq .Values.rollout.strategy "canary" }} +5. Promote or abort the canary: + kubectl argo rollouts promote {{ include "devops-info-service.fullname" . }} --namespace {{ .Release.Namespace }} + kubectl argo rollouts abort {{ include "devops-info-service.fullname" . }} --namespace {{ .Release.Namespace }} +{{- end }} + +{{- if eq .Values.rollout.strategy "blueGreen" }} +5. Test the preview service, then promote: + kubectl port-forward --namespace {{ .Release.Namespace }} svc/{{ include "devops-info-service.fullname" . }}-preview 8081:{{ .Values.rollout.blueGreen.previewService.port }} + kubectl argo rollouts promote {{ include "devops-info-service.fullname" . }} --namespace {{ .Release.Namespace }} +{{- end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/_helpers.tpl b/k8s/devops-info-service/templates/_helpers.tpl new file mode 100644 index 0000000000..a0dd3396ed --- /dev/null +++ b/k8s/devops-info-service/templates/_helpers.tpl @@ -0,0 +1,107 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "devops-info-service.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "devops-info-service.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart label value. +*/}} +{{- define "devops-info-service.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common selector labels. +*/}} +{{- define "devops-info-service.selectorLabels" -}} +app.kubernetes.io/name: {{ include "devops-info-service.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Common labels. +*/}} +{{- define "devops-info-service.labels" -}} +helm.sh/chart: {{ include "devops-info-service.chart" . }} +{{ include "devops-info-service.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: web +app.kubernetes.io/part-of: devops-core-course +{{- end -}} + +{{/* +Service account name. +*/}} +{{- define "devops-info-service.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "devops-info-service.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{/* +Secret name. +*/}} +{{- define "devops-info-service.secretName" -}} +{{- default (printf "%s-secret" (include "devops-info-service.fullname" .)) .Values.secret.name -}} +{{- end -}} + +{{/* +File-based ConfigMap name. +*/}} +{{- define "devops-info-service.configMapName" -}} +{{- printf "%s-config" (include "devops-info-service.fullname" .) -}} +{{- end -}} + +{{/* +Environment ConfigMap name. +*/}} +{{- define "devops-info-service.envConfigMapName" -}} +{{- printf "%s-env" (include "devops-info-service.fullname" .) -}} +{{- end -}} + +{{/* +PersistentVolumeClaim name. +*/}} +{{- define "devops-info-service.pvcName" -}} +{{- printf "%s-data" (include "devops-info-service.fullname" .) -}} +{{- end -}} + +{{/* +Headless service name for StatefulSet network identities. +*/}} +{{- define "devops-info-service.headlessServiceName" -}} +{{- printf "%s-headless" (include "devops-info-service.fullname" .) -}} +{{- end -}} + +{{/* +Vault agent annotations. +*/}} +{{- define "devops-info-service.vaultAnnotations" -}} +vault.hashicorp.com/agent-inject: "true" +vault.hashicorp.com/auth-path: {{ .Values.vault.authPath | quote }} +vault.hashicorp.com/role: {{ .Values.vault.role | quote }} +vault.hashicorp.com/agent-inject-secret-{{ .Values.vault.injectFileName }}: {{ .Values.vault.secretPath | quote }} +vault.hashicorp.com/agent-inject-template-{{ .Values.vault.injectFileName }}: | +{{ .Values.vault.template | nindent 2 }} +{{- end -}} diff --git a/k8s/devops-info-service/templates/analysis-template.yaml b/k8s/devops-info-service/templates/analysis-template.yaml new file mode 100644 index 0000000000..51ea6faa04 --- /dev/null +++ b/k8s/devops-info-service/templates/analysis-template.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.rollout.enabled (not .Values.statefulset.enabled) (eq .Values.rollout.strategy "canary") .Values.rollout.canary.analysis.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: AnalysisTemplate +metadata: + name: {{ .Values.rollout.canary.analysis.templateName }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + metrics: + - name: {{ .Values.rollout.canary.analysis.metricName }} + interval: {{ .Values.rollout.canary.analysis.interval }} + count: {{ .Values.rollout.canary.analysis.count }} + failureLimit: {{ .Values.rollout.canary.analysis.failureLimit }} + successCondition: {{ .Values.rollout.canary.analysis.successCondition | quote }} + provider: + web: + url: http://{{ include "devops-info-service.fullname" . }}.{{ .Release.Namespace }}.svc{{ .Values.rollout.canary.analysis.path }} + jsonPath: {{ .Values.rollout.canary.analysis.jsonPath | quote }} +{{- end }} diff --git a/k8s/devops-info-service/templates/configmap.yaml b/k8s/devops-info-service/templates/configmap.yaml new file mode 100644 index 0000000000..a9c2c94a2a --- /dev/null +++ b/k8s/devops-info-service/templates/configmap.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-info-service.configMapName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +data: + {{ .Values.config.fileName }}: |- +{{ .Files.Get "files/config.json" | indent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-info-service.envConfigMapName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +data: + APP_CONFIG_PATH: {{ .Values.config.appConfigPath | quote }} + APP_DISPLAY_NAME: {{ .Values.config.appName | quote }} + APP_ENV: {{ .Values.config.environment | quote }} + LOG_LEVEL: {{ .Values.config.logLevel | quote }} + VISITS_FILE: {{ .Values.config.visitsFile | quote }} diff --git a/k8s/devops-info-service/templates/deployment.yaml b/k8s/devops-info-service/templates/deployment.yaml new file mode 100644 index 0000000000..02bf8c7074 --- /dev/null +++ b/k8s/devops-info-service/templates/deployment.yaml @@ -0,0 +1,107 @@ +{{- if and (not .Values.rollout.enabled) (not .Values.statefulset.enabled) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: {{ .Values.strategy.type }} + {{- if eq .Values.strategy.type "RollingUpdate" }} + rollingUpdate: + maxSurge: {{ .Values.strategy.rollingUpdate.maxSurge }} + maxUnavailable: {{ .Values.strategy.rollingUpdate.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "devops-info-service.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info-service.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.vault.enabled .Values.podAnnotations }} + annotations: + {{- if .Values.vault.enabled }} + {{- include "devops-info-service.vaultAnnotations" . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + spec: + serviceAccountName: {{ include "devops-info-service.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ include "devops-info-service.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 12 }} + ports: + - name: http + containerPort: {{ .Values.container.port }} + protocol: TCP + env: + {{- range .Values.env }} + - name: {{ .name }} + value: {{ .value | quote }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "devops-info-service.envConfigMapName" . }} + {{- if or .Values.secret.create .Values.secret.name }} + - secretRef: + name: {{ include "devops-info-service.secretName" . }} + {{- end }} + volumeMounts: + - name: app-config + mountPath: {{ printf "%s/%s" .Values.config.fileMountPath .Values.config.fileName }} + subPath: {{ .Values.config.fileName }} + readOnly: true + - name: app-data + mountPath: {{ .Values.persistence.mountPath }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: {{ .Values.readinessProbe.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: {{ .Values.livenessProbe.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: app-config + configMap: + name: {{ include "devops-info-service.configMapName" . }} + - name: app-data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "devops-info-service.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/headless-service.yaml b/k8s/devops-info-service/templates/headless-service.yaml new file mode 100644 index 0000000000..fca2c9628d --- /dev/null +++ b/k8s/devops-info-service/templates/headless-service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.statefulset.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info-service.headlessServiceName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + {{- with .Values.statefulset.headlessService.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + clusterIP: None + selector: + {{- include "devops-info-service.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} +{{- end }} diff --git a/k8s/devops-info-service/templates/hooks/post-install-job.yaml b/k8s/devops-info-service/templates/hooks/post-install-job.yaml new file mode 100644 index 0000000000..e8c0a67bab --- /dev/null +++ b/k8s/devops-info-service/templates/hooks/post-install-job.yaml @@ -0,0 +1,46 @@ +{{- if .Values.hooks.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-info-service.fullname" . }}-post-install" + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "{{ .Values.hooks.postInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + ttlSecondsAfterFinished: {{ .Values.hooks.postInstall.ttlSecondsAfterFinished }} + template: + metadata: + labels: + {{- include "devops-info-service.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: post-install-smoke-test + image: "{{ .Values.hooks.image.repository }}:{{ .Values.hooks.image.tag }}" + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - python + - -c + - | + import sys + import time + import urllib.request + url = "http://{{ include "devops-info-service.fullname" . }}:{{ .Values.service.port }}/health" + print(f"Waiting for service endpoint: {url}") + last_error = None + for attempt in range(30): + try: + with urllib.request.urlopen(url, timeout=5) as response: + body = response.read().decode("utf-8", errors="replace") + print(body) + print("Post-install smoke test completed") + sys.exit(0) + except Exception as exc: + last_error = exc + print(f"Attempt {attempt + 1}/30 failed: {exc}") + time.sleep(2) + raise SystemExit(f"Smoke test failed: {last_error}") +{{- end }} diff --git a/k8s/devops-info-service/templates/hooks/pre-install-job.yaml b/k8s/devops-info-service/templates/hooks/pre-install-job.yaml new file mode 100644 index 0000000000..fe798d7f3e --- /dev/null +++ b/k8s/devops-info-service/templates/hooks/pre-install-job.yaml @@ -0,0 +1,40 @@ +{{- if .Values.hooks.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-info-service.fullname" . }}-pre-install" + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "{{ .Values.hooks.preInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + ttlSecondsAfterFinished: {{ .Values.hooks.preInstall.ttlSecondsAfterFinished }} + template: + metadata: + labels: + {{- include "devops-info-service.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: pre-install-validation + image: "{{ .Values.hooks.image.repository }}:{{ .Values.hooks.image.tag }}" + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - python + - -c + - | + import sys + replica_count = int("{{ .Values.replicaCount }}") + image_repo = "{{ .Values.image.repository }}" + image_tag = "{{ .Values.image.tag }}" + print("Validating release {{ .Release.Name }}") + if replica_count < 1: + raise SystemExit("replicaCount must be >= 1") + if not image_repo: + raise SystemExit("image.repository must not be empty") + print(f"Replica count: {replica_count}") + print(f"Image: {image_repo}:{image_tag}") + print("Pre-install validation completed") +{{- end }} diff --git a/k8s/devops-info-service/templates/preview-service.yaml b/k8s/devops-info-service/templates/preview-service.yaml new file mode 100644 index 0000000000..0f71397cfc --- /dev/null +++ b/k8s/devops-info-service/templates/preview-service.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.rollout.enabled (not .Values.statefulset.enabled) (eq .Values.rollout.strategy "blueGreen") }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info-service.fullname" . }}-preview + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + {{- with .Values.rollout.blueGreen.previewService.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.rollout.blueGreen.previewService.type }} + selector: + {{- include "devops-info-service.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.rollout.blueGreen.previewService.port }} + targetPort: {{ .Values.rollout.blueGreen.previewService.targetPort }} +{{- end }} diff --git a/k8s/devops-info-service/templates/pvc.yaml b/k8s/devops-info-service/templates/pvc.yaml new file mode 100644 index 0000000000..fce844f03c --- /dev/null +++ b/k8s/devops-info-service/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.persistence.enabled (not .Values.statefulset.enabled) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "devops-info-service.pvcName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + accessModes: + {{- toYaml .Values.persistence.accessModes | nindent 4 }} + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/rollout.yaml b/k8s/devops-info-service/templates/rollout.yaml new file mode 100644 index 0000000000..c389dd7c7a --- /dev/null +++ b/k8s/devops-info-service/templates/rollout.yaml @@ -0,0 +1,116 @@ +{{- if and .Values.rollout.enabled (not .Values.statefulset.enabled) }} +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.rollout.revisionHistoryLimit }} + progressDeadlineSeconds: {{ .Values.rollout.progressDeadlineSeconds }} + strategy: + {{- if eq .Values.rollout.strategy "canary" }} + canary: + steps: + {{- toYaml .Values.rollout.canary.steps | nindent 8 }} + {{- else if eq .Values.rollout.strategy "blueGreen" }} + blueGreen: + activeService: {{ include "devops-info-service.fullname" . }} + previewService: {{ include "devops-info-service.fullname" . }}-preview + autoPromotionEnabled: {{ .Values.rollout.blueGreen.autoPromotionEnabled }} + scaleDownDelaySeconds: {{ .Values.rollout.blueGreen.scaleDownDelaySeconds }} + {{- else }} + {{- fail "rollout.strategy must be either canary or blueGreen" }} + {{- end }} + selector: + matchLabels: + {{- include "devops-info-service.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info-service.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.vault.enabled .Values.podAnnotations }} + annotations: + {{- if .Values.vault.enabled }} + {{- include "devops-info-service.vaultAnnotations" . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + spec: + serviceAccountName: {{ include "devops-info-service.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ include "devops-info-service.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 12 }} + ports: + - name: http + containerPort: {{ .Values.container.port }} + protocol: TCP + env: + {{- range .Values.env }} + - name: {{ .name }} + value: {{ .value | quote }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "devops-info-service.envConfigMapName" . }} + {{- if or .Values.secret.create .Values.secret.name }} + - secretRef: + name: {{ include "devops-info-service.secretName" . }} + {{- end }} + volumeMounts: + - name: app-config + mountPath: {{ printf "%s/%s" .Values.config.fileMountPath .Values.config.fileName }} + subPath: {{ .Values.config.fileName }} + readOnly: true + - name: app-data + mountPath: {{ .Values.persistence.mountPath }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: {{ .Values.readinessProbe.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: {{ .Values.livenessProbe.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: app-config + configMap: + name: {{ include "devops-info-service.configMapName" . }} + - name: app-data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "devops-info-service.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/secrets.yaml b/k8s/devops-info-service/templates/secrets.yaml new file mode 100644 index 0000000000..78bf16d703 --- /dev/null +++ b/k8s/devops-info-service/templates/secrets.yaml @@ -0,0 +1,13 @@ +{{- if .Values.secret.create }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "devops-info-service.secretName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +type: {{ .Values.secret.type }} +stringData: + {{- range $key, $value := .Values.secret.data }} + {{ $key }}: {{ $value | quote }} + {{- end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/service.yaml b/k8s/devops-info-service/templates/service.yaml new file mode 100644 index 0000000000..dabc3cbcaf --- /dev/null +++ b/k8s/devops-info-service/templates/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "devops-info-service.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + {{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} diff --git a/k8s/devops-info-service/templates/serviceaccount.yaml b/k8s/devops-info-service/templates/serviceaccount.yaml new file mode 100644 index 0000000000..20bb7f1973 --- /dev/null +++ b/k8s/devops-info-service/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "devops-info-service.serviceAccountName" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/k8s/devops-info-service/templates/servicemonitor.yaml b/k8s/devops-info-service/templates/servicemonitor.yaml new file mode 100644 index 0000000000..8b8251e085 --- /dev/null +++ b/k8s/devops-info-service/templates/servicemonitor.yaml @@ -0,0 +1,20 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "devops-info-service.selectorLabels" . | nindent 6 }} + endpoints: + - port: http + path: {{ .Values.serviceMonitor.path | quote }} + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} +{{- end }} diff --git a/k8s/devops-info-service/templates/statefulset.yaml b/k8s/devops-info-service/templates/statefulset.yaml new file mode 100644 index 0000000000..e797c33ef1 --- /dev/null +++ b/k8s/devops-info-service/templates/statefulset.yaml @@ -0,0 +1,121 @@ +{{- if .Values.statefulset.enabled }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + serviceName: {{ include "devops-info-service.headlessServiceName" . }} + replicas: {{ .Values.replicaCount }} + podManagementPolicy: {{ .Values.statefulset.podManagementPolicy }} + updateStrategy: + type: {{ .Values.statefulset.updateStrategy.type }} + {{- if eq .Values.statefulset.updateStrategy.type "RollingUpdate" }} + rollingUpdate: + partition: {{ .Values.statefulset.updateStrategy.rollingUpdate.partition }} + {{- end }} + selector: + matchLabels: + {{- include "devops-info-service.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info-service.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.vault.enabled .Values.podAnnotations }} + annotations: + {{- if .Values.vault.enabled }} + {{- include "devops-info-service.vaultAnnotations" . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + spec: + serviceAccountName: {{ include "devops-info-service.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ include "devops-info-service.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 12 }} + ports: + - name: http + containerPort: {{ .Values.container.port }} + protocol: TCP + env: + {{- range .Values.env }} + - name: {{ .name }} + value: {{ .value | quote }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "devops-info-service.envConfigMapName" . }} + {{- if or .Values.secret.create .Values.secret.name }} + - secretRef: + name: {{ include "devops-info-service.secretName" . }} + {{- end }} + volumeMounts: + - name: app-config + mountPath: {{ printf "%s/%s" .Values.config.fileMountPath .Values.config.fileName }} + subPath: {{ .Values.config.fileName }} + readOnly: true + - name: app-data + mountPath: {{ .Values.persistence.mountPath }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: {{ .Values.readinessProbe.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: {{ .Values.livenessProbe.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: app-config + configMap: + name: {{ include "devops-info-service.configMapName" . }} + {{- if not .Values.persistence.enabled }} + - name: app-data + emptyDir: {} + {{- end }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: app-data + labels: + {{- include "devops-info-service.labels" . | nindent 10 }} + spec: + accessModes: + {{- toYaml .Values.persistence.accessModes | nindent 10 }} + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} + {{- end }} +{{- end }} diff --git a/k8s/devops-info-service/values-bluegreen.yaml b/k8s/devops-info-service/values-bluegreen.yaml new file mode 100644 index 0000000000..9223d86946 --- /dev/null +++ b/k8s/devops-info-service/values-bluegreen.yaml @@ -0,0 +1,16 @@ +replicaCount: 3 + +service: + type: ClusterIP + nodePort: null + +rollout: + enabled: true + strategy: blueGreen + blueGreen: + autoPromotionEnabled: false + scaleDownDelaySeconds: 30 + previewService: + type: ClusterIP + port: 80 + targetPort: http diff --git a/k8s/devops-info-service/values-canary.yaml b/k8s/devops-info-service/values-canary.yaml new file mode 100644 index 0000000000..6cdb821c89 --- /dev/null +++ b/k8s/devops-info-service/values-canary.yaml @@ -0,0 +1,28 @@ +replicaCount: 5 + +service: + type: ClusterIP + nodePort: null + +rollout: + enabled: true + strategy: canary + canary: + steps: + - setWeight: 20 + - pause: {} + - analysis: + templates: + - templateName: success-rate + - setWeight: 40 + - pause: + duration: 30s + - setWeight: 60 + - pause: + duration: 30s + - setWeight: 80 + - pause: + duration: 30s + - setWeight: 100 + analysis: + enabled: true diff --git a/k8s/devops-info-service/values-dev.yaml b/k8s/devops-info-service/values-dev.yaml new file mode 100644 index 0000000000..db9d10156c --- /dev/null +++ b/k8s/devops-info-service/values-dev.yaml @@ -0,0 +1,33 @@ +replicaCount: 1 + +image: + tag: latest + pullPolicy: Always + +config: + environment: dev + logLevel: DEBUG + +service: + type: NodePort + nodePort: 30080 + +persistence: + enabled: true + size: 100Mi + +resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + +readinessProbe: + initialDelaySeconds: 3 + periodSeconds: 5 diff --git a/k8s/devops-info-service/values-prod.yaml b/k8s/devops-info-service/values-prod.yaml new file mode 100644 index 0000000000..6d625a0540 --- /dev/null +++ b/k8s/devops-info-service/values-prod.yaml @@ -0,0 +1,33 @@ +replicaCount: 3 + +image: + tag: "1.0.0" + pullPolicy: IfNotPresent + +config: + environment: prod + logLevel: INFO + +service: + type: LoadBalancer + nodePort: null + +persistence: + enabled: true + size: 250Mi + +resources: + requests: + cpu: 150m + memory: 192Mi + limits: + cpu: 300m + memory: 384Mi + +livenessProbe: + initialDelaySeconds: 20 + periodSeconds: 10 + +readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 5 diff --git a/k8s/devops-info-service/values-statefulset.yaml b/k8s/devops-info-service/values-statefulset.yaml new file mode 100644 index 0000000000..f8c211cb07 --- /dev/null +++ b/k8s/devops-info-service/values-statefulset.yaml @@ -0,0 +1,28 @@ +replicaCount: 3 + +rollout: + enabled: false + +statefulset: + enabled: true + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: 0 + +service: + type: ClusterIP + nodePort: null + +persistence: + enabled: true + size: 100Mi + +resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi diff --git a/k8s/devops-info-service/values.yaml b/k8s/devops-info-service/values.yaml new file mode 100644 index 0000000000..f2ab1ee37c --- /dev/null +++ b/k8s/devops-info-service/values.yaml @@ -0,0 +1,189 @@ +nameOverride: "" +fullnameOverride: "" + +replicaCount: 3 + +image: + repository: egrapa/devops-core-course-lab2 + tag: latest + pullPolicy: Always + +container: + port: 5000 + +service: + type: NodePort + port: 80 + targetPort: http + nodePort: 30080 + annotations: {} + +serviceMonitor: + enabled: false + labels: + release: monitoring + interval: 30s + scrapeTimeout: 10s + path: /metrics + +strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + +rollout: + enabled: true + strategy: canary + revisionHistoryLimit: 5 + progressDeadlineSeconds: 600 + canary: + steps: + - setWeight: 20 + - pause: {} + - analysis: + templates: + - templateName: success-rate + - setWeight: 40 + - pause: + duration: 30s + - setWeight: 60 + - pause: + duration: 30s + - setWeight: 80 + - pause: + duration: 30s + - setWeight: 100 + analysis: + enabled: true + templateName: success-rate + metricName: webcheck + path: /health + jsonPath: "{$.status}" + successCondition: result == "healthy" + interval: 10s + count: 3 + failureLimit: 1 + blueGreen: + autoPromotionEnabled: false + scaleDownDelaySeconds: 30 + previewService: + type: ClusterIP + port: 80 + targetPort: http + annotations: {} + +statefulset: + enabled: false + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: 0 + headlessService: + annotations: {} + +podAnnotations: {} +podLabels: {} + +podSecurityContext: + fsGroup: 10001 + fsGroupChangePolicy: OnRootMismatch + +containerSecurityContext: + runAsNonRoot: true + runAsUser: 10001 + runAsGroup: 10001 + allowPrivilegeEscalation: false + +env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + +config: + fileMountPath: /config + fileName: config.json + appConfigPath: /config/config.json + appName: devops-info-service + environment: dev + logLevel: INFO + visitsFile: /data/visits + +persistence: + enabled: true + mountPath: /data + accessModes: + - ReadWriteOnce + size: 100Mi + storageClass: "" + +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +secret: + create: true + name: "" + type: Opaque + data: + username: "change-me" + password: "change-me-too" + +vault: + enabled: false + role: "devops-info-service" + secretPath: "secret/data/devops-info-service/config" + authPath: "auth/kubernetes" + injectFileName: "config.txt" + template: | + {{- with secret "secret/data/devops-info-service/config" -}} + APP_USERNAME={{ .Data.data.username }} + APP_PASSWORD={{ .Data.data.password }} + {{- end }} + +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi + +livenessProbe: + enabled: true + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + successThreshold: 1 + +readinessProbe: + enabled: true + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + successThreshold: 1 + +hooks: + enabled: true + image: + repository: egrapa/devops-core-course-lab2 + tag: latest + pullPolicy: Always + preInstall: + weight: -5 + ttlSecondsAfterFinished: 60 + postInstall: + weight: 5 + ttlSecondsAfterFinished: 60 diff --git a/k8s/image-1.png b/k8s/image-1.png new file mode 100644 index 0000000000..3769a68b88 Binary files /dev/null and b/k8s/image-1.png differ diff --git a/k8s/image-10.png b/k8s/image-10.png new file mode 100644 index 0000000000..6eff7f8a16 Binary files /dev/null and b/k8s/image-10.png differ diff --git a/k8s/image-11.png b/k8s/image-11.png new file mode 100644 index 0000000000..925606370d Binary files /dev/null and b/k8s/image-11.png differ diff --git a/k8s/image-12.png b/k8s/image-12.png new file mode 100644 index 0000000000..61320a7de6 Binary files /dev/null and b/k8s/image-12.png differ diff --git a/k8s/image-13.png b/k8s/image-13.png new file mode 100644 index 0000000000..b64dcec85f Binary files /dev/null and b/k8s/image-13.png differ diff --git a/k8s/image-14.png b/k8s/image-14.png new file mode 100644 index 0000000000..0729215c50 Binary files /dev/null and b/k8s/image-14.png differ diff --git a/k8s/image-15.png b/k8s/image-15.png new file mode 100644 index 0000000000..b04a1a3606 Binary files /dev/null and b/k8s/image-15.png differ diff --git a/k8s/image-16.png b/k8s/image-16.png new file mode 100644 index 0000000000..f5324f6bfc Binary files /dev/null and b/k8s/image-16.png differ diff --git a/k8s/image-17.png b/k8s/image-17.png new file mode 100644 index 0000000000..609b22dadd Binary files /dev/null and b/k8s/image-17.png differ diff --git a/k8s/image-18.png b/k8s/image-18.png new file mode 100644 index 0000000000..dcd3ce2a72 Binary files /dev/null and b/k8s/image-18.png differ diff --git a/k8s/image-19.png b/k8s/image-19.png new file mode 100644 index 0000000000..0d4fb0d5b3 Binary files /dev/null and b/k8s/image-19.png differ diff --git a/k8s/image-2.png b/k8s/image-2.png new file mode 100644 index 0000000000..ee9f743ba1 Binary files /dev/null and b/k8s/image-2.png differ diff --git a/k8s/image-20.png b/k8s/image-20.png new file mode 100644 index 0000000000..fbd6ee754b Binary files /dev/null and b/k8s/image-20.png differ diff --git a/k8s/image-21.png b/k8s/image-21.png new file mode 100644 index 0000000000..ddf8c906d8 Binary files /dev/null and b/k8s/image-21.png differ diff --git a/k8s/image-22.png b/k8s/image-22.png new file mode 100644 index 0000000000..5fe08b2923 Binary files /dev/null and b/k8s/image-22.png differ diff --git a/k8s/image-23.png b/k8s/image-23.png new file mode 100644 index 0000000000..17cf564327 Binary files /dev/null and b/k8s/image-23.png differ diff --git a/k8s/image-24.png b/k8s/image-24.png new file mode 100644 index 0000000000..919539d416 Binary files /dev/null and b/k8s/image-24.png differ diff --git a/k8s/image-25.png b/k8s/image-25.png new file mode 100644 index 0000000000..2bf1aed47c Binary files /dev/null and b/k8s/image-25.png differ diff --git a/k8s/image-26.png b/k8s/image-26.png new file mode 100644 index 0000000000..bfc0969596 Binary files /dev/null and b/k8s/image-26.png differ diff --git a/k8s/image-27.png b/k8s/image-27.png new file mode 100644 index 0000000000..574d82380e Binary files /dev/null and b/k8s/image-27.png differ diff --git a/k8s/image-28.png b/k8s/image-28.png new file mode 100644 index 0000000000..bfa376a876 Binary files /dev/null and b/k8s/image-28.png differ diff --git a/k8s/image-29.png b/k8s/image-29.png new file mode 100644 index 0000000000..22a9449c8b Binary files /dev/null and b/k8s/image-29.png differ diff --git a/k8s/image-3.png b/k8s/image-3.png new file mode 100644 index 0000000000..b3f67d8206 Binary files /dev/null and b/k8s/image-3.png differ diff --git a/k8s/image-30.png b/k8s/image-30.png new file mode 100644 index 0000000000..b916055c8d Binary files /dev/null and b/k8s/image-30.png differ diff --git a/k8s/image-31.png b/k8s/image-31.png new file mode 100644 index 0000000000..e60d316f99 Binary files /dev/null and b/k8s/image-31.png differ diff --git a/k8s/image-32.png b/k8s/image-32.png new file mode 100644 index 0000000000..8c0700b059 Binary files /dev/null and b/k8s/image-32.png differ diff --git a/k8s/image-33.png b/k8s/image-33.png new file mode 100644 index 0000000000..acecc42ddb Binary files /dev/null and b/k8s/image-33.png differ diff --git a/k8s/image-4.png b/k8s/image-4.png new file mode 100644 index 0000000000..7786c6994a Binary files /dev/null and b/k8s/image-4.png differ diff --git a/k8s/image-5.png b/k8s/image-5.png new file mode 100644 index 0000000000..cdf42ce164 Binary files /dev/null and b/k8s/image-5.png differ diff --git a/k8s/image-6.png b/k8s/image-6.png new file mode 100644 index 0000000000..f7277025fc Binary files /dev/null and b/k8s/image-6.png differ diff --git a/k8s/image-7.png b/k8s/image-7.png new file mode 100644 index 0000000000..d132e7e38d Binary files /dev/null and b/k8s/image-7.png differ diff --git a/k8s/image-8.png b/k8s/image-8.png new file mode 100644 index 0000000000..ded9813cb9 Binary files /dev/null and b/k8s/image-8.png differ diff --git a/k8s/image-9.png b/k8s/image-9.png new file mode 100644 index 0000000000..4669c10d20 Binary files /dev/null and b/k8s/image-9.png differ diff --git a/k8s/image.png b/k8s/image.png new file mode 100644 index 0000000000..4285b2de83 Binary files /dev/null and b/k8s/image.png differ diff --git a/k8s/lab16-evidence/alertmanager-alerts.png b/k8s/lab16-evidence/alertmanager-alerts.png new file mode 100644 index 0000000000..0f50006d2a Binary files /dev/null and b/k8s/lab16-evidence/alertmanager-alerts.png differ diff --git a/k8s/lab16-evidence/grafana-kubelet.png b/k8s/lab16-evidence/grafana-kubelet.png new file mode 100644 index 0000000000..dec7c6cf9f Binary files /dev/null and b/k8s/lab16-evidence/grafana-kubelet.png differ diff --git a/k8s/lab16-evidence/grafana-namespace-pods.png b/k8s/lab16-evidence/grafana-namespace-pods.png new file mode 100644 index 0000000000..c811fec787 Binary files /dev/null and b/k8s/lab16-evidence/grafana-namespace-pods.png differ diff --git a/k8s/lab16-evidence/grafana-network.png b/k8s/lab16-evidence/grafana-network.png new file mode 100644 index 0000000000..904dd1385e Binary files /dev/null and b/k8s/lab16-evidence/grafana-network.png differ diff --git a/k8s/lab16-evidence/grafana-node-exporter.png b/k8s/lab16-evidence/grafana-node-exporter.png new file mode 100644 index 0000000000..308c954771 Binary files /dev/null and b/k8s/lab16-evidence/grafana-node-exporter.png differ diff --git a/k8s/lab16-evidence/prometheus-targets.png b/k8s/lab16-evidence/prometheus-targets.png new file mode 100644 index 0000000000..4b4d8a28c1 Binary files /dev/null and b/k8s/lab16-evidence/prometheus-targets.png differ diff --git a/k8s/lab16-init-containers.yaml b/k8s/lab16-init-containers.yaml new file mode 100644 index 0000000000..d10b73ca1a --- /dev/null +++ b/k8s/lab16-init-containers.yaml @@ -0,0 +1,116 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: lab16-content + labels: + app.kubernetes.io/name: lab16-content + app.kubernetes.io/part-of: devops-core-course +data: + index.html: | + Lab 16 init container download evidence. +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: lab16-content + labels: + app.kubernetes.io/name: lab16-content + app.kubernetes.io/part-of: devops-core-course +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: lab16-content + template: + metadata: + labels: + app.kubernetes.io/name: lab16-content + app.kubernetes.io/part-of: devops-core-course + spec: + containers: + - name: nginx + image: nginx:1.27-alpine + ports: + - name: http + containerPort: 80 + volumeMounts: + - name: content + mountPath: /usr/share/nginx/html/index.html + subPath: index.html + readOnly: true + volumes: + - name: content + configMap: + name: lab16-content +--- +apiVersion: v1 +kind: Service +metadata: + name: lab16-content + labels: + app.kubernetes.io/name: lab16-content + app.kubernetes.io/part-of: devops-core-course +spec: + selector: + app.kubernetes.io/name: lab16-content + ports: + - name: http + port: 80 + targetPort: http +--- +apiVersion: v1 +kind: Pod +metadata: + name: lab16-init-download + labels: + app.kubernetes.io/name: lab16-init-download + app.kubernetes.io/part-of: devops-core-course +spec: + restartPolicy: Never + initContainers: + - name: init-download + image: busybox:1.36 + command: + - sh + - -c + - wget -O /work-dir/index.html "http://${LAB16_CONTENT_SERVICE_HOST}" + volumeMounts: + - name: workdir + mountPath: /work-dir + containers: + - name: main-app + image: busybox:1.36 + command: + - sh + - -c + - cat /data/index.html && sleep 3600 + volumeMounts: + - name: workdir + mountPath: /data + volumes: + - name: workdir + emptyDir: {} +--- +apiVersion: v1 +kind: Pod +metadata: + name: lab16-wait-for-service + labels: + app.kubernetes.io/name: lab16-wait-for-service + app.kubernetes.io/part-of: devops-core-course +spec: + restartPolicy: Never + initContainers: + - name: wait-for-service + image: busybox:1.36 + command: + - sh + - -c + - until nslookup lab16-content.default.svc.cluster.local; do echo "waiting for lab16-content"; sleep 2; done + containers: + - name: main-app + image: busybox:1.36 + command: + - sh + - -c + - echo "dependency is ready" && sleep 3600 diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..9bdd95f930 --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course +spec: + type: NodePort + selector: + app.kubernetes.io/name: devops-info-service + ports: + - name: http + protocol: TCP + port: 80 + targetPort: http + nodePort: 30080 diff --git a/monitoring/.env.example b/monitoring/.env.example new file mode 100644 index 0000000000..56d5ab2296 --- /dev/null +++ b/monitoring/.env.example @@ -0,0 +1,4 @@ +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=changeme +GF_SECURITY_ALLOW_EMBEDDING=false +# Copy to .env and override values as needed. Anonymous auth is disabled in compose. diff --git a/monitoring/data/.gitkeep b/monitoring/data/.gitkeep new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/monitoring/data/.gitkeep @@ -0,0 +1 @@ + diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..8d0556d7cd --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,171 @@ +services: + prometheus: + image: prom/prometheus:v3.9.0 + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.retention.time=15d" + - "--storage.tsdb.retention.size=10GB" + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + loki: + image: grafana/loki:3.0.0 + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 15s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + promtail: + build: + context: ./promtail + dockerfile: Dockerfile + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - promtail-positions:/var/log/promtail + networks: + - logging + depends_on: + - loki + healthcheck: + test: + [ + "CMD-SHELL", + "curl -sf http://localhost:9080/ready || curl -sf http://localhost:9080/-/ready", + ] + interval: 10s + timeout: 5s + retries: 5 + start_period: 25s + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + reservations: + cpus: "0.25" + memory: 128M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3000:3000" + env_file: + - .env + environment: + - GF_SECURITY_ADMIN_USER=${GF_SECURITY_ADMIN_USER:-admin} + - GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:?set in .env} + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ALLOW_EMBEDDING=${GF_SECURITY_ALLOW_EMBEDDING:-false} + - GF_METRICS_ENABLED=true + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + networks: + - logging + depends_on: + - prometheus + - loki + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + app-python: + image: egrapa/devops-core-course-lab2:latest + build: + context: ../app_python + user: "${UID:-1000}:${GID:-1000}" + environment: + - HOST=0.0.0.0 + - PORT=8000 + - DEBUG=false + - APP_CONFIG_PATH=/config/config.json + - VISITS_FILE=/data/visits + ports: + - "8000:8000" + volumes: + - ./data:/data + - ../k8s/devops-info-service/files/config.json:/config/config.json:ro + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request,sys; urllib.request.urlopen('http://localhost:8000/health')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + reservations: + cpus: "0.25" + memory: 128M + +networks: + logging: + driver: bridge + +volumes: + prometheus-data: + loki-data: + grafana-data: + promtail-positions: diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..ae8390cb57 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,110 @@ +# Lab 7 — Observability & Logging with Loki Stack + +**Name:** Egor Pustovoytenko +**Date:** 2026-03-12 + +--- + +## Overview + +I deployed a Loki + Promtail + Grafana stack with the Flask app feeding +structured JSON logs. The stack runs via Docker Compose on a dedicated +`logging` network with persisted volumes and health checks. Grafana +anonymous access is disabled; admin creds come from `.env`. Promtail is +built locally to include `curl` for the healthcheck probe. + +--- + +## Architecture + +``` +[Flask app :8000] --stdout--> [Promtail 3.0] --push--> [Loki 3.0 TSDB] + labels (app,container) | + docker_sd + relabeling v + [Grafana 12.3] -- dashboards +``` + +- Storage: Loki TSDB on filesystem (`loki-data`), Promtail positions + (`promtail-positions`), Grafana state (`grafana-data`). +- Network: single bridge `logging`. + +![alt text](image-2.png) + +--- + +## Stack Implementation + +### Compose (`monitoring/docker-compose.yml`) +- Services: `loki` (3100), `promtail` (9080), `grafana` (3000), + `app-python` (8000). +- Mounted configs: `/etc/loki/config.yml`, `/etc/promtail/config.yml`. +- Promtail built from `monitoring/promtail/Dockerfile` (adds curl) and + health-checked via `/ready` with `/-/ready` fallback. +- Health checks on Loki `/ready`, Grafana `/api/health`, app `/health`. +- Resource limits/reservations added to every service. +- Grafana env: anonymous disabled, admin user/pass from `.env`. + +### Loki (`monitoring/loki/config.yml`) +- TSDB + filesystem object store, schema v13, `path_prefix: /loki`. +- Retention `168h` with compactor enabled; embedded cache for queries. +- Ring stored in-memory for single-node lab. + +### Promtail (`monitoring/promtail/config.yml`) +- Discovers Docker containers via socket SD every 5s. +- Keeps only containers labeled `logging=promtail`; forwards `app` label. +- Keeps both `stdout` and `stderr` log streams (`__meta_docker_container_log_stream` relabel). +- Docker pipeline stage keeps JSON intact. +- Relabels container name into `container` and `job` for LogQL selectors. +- Custom image includes `curl` to support the healthcheck probe. + +### Application Logging (`app_python/app.py`) +- Custom `JSONFormatter` pushes logs to stdout with fields: + `timestamp`, `level`, `logger`, `message` + context. +- Events: `startup`, `request_received`, `response_sent`, `not_found`, + `internal_error` (with stack trace). +- Promtail attaches `app="devops-python"` and `container="app-python"`. + +![alt text](image-3.png) + +--- + +## Validation + +- Stack up: `cd monitoring && docker compose up -d --build promtail`. +- Status: `docker compose ps` → all services `Up (healthy)`. +![alt text](image-4.png) +- Loki ready: `curl -f http://localhost:3100/ready`. +- Promtail targets: `curl -s http://localhost:9080/targets` +![alt text](image-5.png) +- Grafana health: `curl -f http://localhost:3000/api/health`. +- Traffic generation: + +```bash +for i in {1..20}; do curl -s http://localhost:8000/; done +for i in {1..20}; do curl -s http://localhost:8000/health; done +``` + +LogQL queries exercised in Explore: +![alt text](image-6.png) \ +There is this smart interactive window, so no point in actaully understanding syntax? It even explains what is going on +![alt text](image-7.png) + +## Production Notes + +- Grafana anonymous auth off; creds via `.env` (example in + `monitoring/.env.example`). +- Resource limits/reservations on every service. +- Loki retention 7 days with compactor cleanup. +- Health checks for all containers to fail fast and restart. + +--- + +## Challenges & Decisions + +- Avoided extra deps for JSON logging by writing a small formatter. +- Kept scrape scope tight with Docker label filtering to reduce noise. +- Promtail base image lacked curl/wget/nc; built a tiny derivative + (removed broken `bullseye-backports`) so the `/ready` healthcheck + works—was the main difficulty hit during setup. +- Balanced convenience/security: disabled anonymous Grafana and kept + secrets out of VCS via `.env`. diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..4bbbd11e06 --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,277 @@ +# Lab 8 — Metrics & Monitoring with Prometheus + +**Name:** Egor Pustovoytenko +**Date:** 2026-03-19 + +--- + +## Overview + +This lab extends the Lab 7 observability stack with Prometheus metrics +and Grafana metric dashboards. The Flask application now exposes a +Prometheus-compatible `/metrics` endpoint, Prometheus scrapes the app and +the monitoring services every 15 seconds, and Grafana is provisioned +with Prometheus and Loki data sources plus a ready-made application +dashboard. + +The implementation was added without changing the basic stack layout: +the app, Loki, Promtail, Prometheus, and Grafana all run on the shared +`logging` Docker network with persistent named volumes. + +--- + +## Architecture + +```text +Browser / curl + | + v +[Flask app :8000] + |- JSON logs --------------------> [Promtail] ----> [Loki] + |- /metrics ---------------------> [Prometheus] --> [Grafana] + ^ + | + scrapes self, Loki, Grafana +``` +--- + +## Application Instrumentation + +Instrumentation was added in [`app.py`](/home/egrapa/prog/tmp/DevOps-Core-Course/app_python/app.py). + +### HTTP metrics + +- `http_requests_total{method, endpoint, status_code}` + Counts completed HTTP requests. +- `http_request_duration_seconds{method, endpoint, status_code}` + Histogram for request latency distribution. +- `http_requests_in_progress{method, endpoint}` + Gauge for concurrent in-flight requests. + +### Application-specific metrics + +- `devops_info_endpoint_calls_total{endpoint}` + Tracks usage of the three exposed endpoints. +- `devops_info_system_collection_seconds` + Measures the time needed to collect runtime/system information for `/`. + +### Why these metrics + +- **Rate:** `http_requests_total` supports request-rate queries. +- **Errors:** `http_requests_total` filtered by `status_code=~"5.."`. +- **Duration:** `http_request_duration_seconds` supports percentiles and + heatmaps. +- **Operational state:** `http_requests_in_progress` shows active load. +- **App behavior:** custom counters/histograms describe endpoint usage + and internal work. + + +![alt text](image-8.png) + +--- + +## Prometheus Configuration + +Prometheus config lives in +[`prometheus.yml`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/prometheus/prometheus.yml). + +### Scrape settings + +- Scrape interval: `15s` +- Evaluation interval: `15s` +- Jobs: + - `prometheus` → `localhost:9090` + - `app` → `app-python:8000/metrics` + - `loki` → `loki:3100/metrics` + - `grafana` → `grafana:3000/metrics` + +### Retention + +Retention is configured in +[`docker-compose.yml`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/docker-compose.yml) +through container arguments: + +- `--storage.tsdb.retention.time=15d` +- `--storage.tsdb.retention.size=10GB` + +### Persistence + +Prometheus data is stored in the named volume `prometheus-data`. + + +![alt text](image-9.png) +![alt text](image-10.png) + +--- + +## Dashboard Walkthrough + +Grafana provisioning was added under +[`monitoring/grafana/provisioning`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/grafana/provisioning) +and the dashboard JSON is stored in +[`app-metrics-dashboard.json`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/grafana/dashboards/app-metrics-dashboard.json). + +The dashboard contains 7 panels: + +1. **Request Rate by Endpoint** + Query: `sum by (endpoint) (rate(http_requests_total[5m]))` +2. **Error Rate** + Query: `sum(rate(http_requests_total{status_code=~"5.."}[5m]))` +3. **Request Duration p95** + Query: `histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket[5m])))` +4. **Request Duration Heatmap** + Query: `sum by (le) (rate(http_request_duration_seconds_bucket[5m]))` +5. **Active Requests** + Query: `sum(http_requests_in_progress)` +6. **Status Code Distribution** + Query: `sum by (status_code) (rate(http_requests_total[5m]))` +7. **App Uptime** + Query: `max(up{job="app"})` + +![alt text](image-12.png) + +--- + +## PromQL Examples + +1. `up` + Shows whether each scraped target is up. + +2. `sum by (endpoint) (rate(http_requests_total[5m]))` + Request rate per endpoint. + +3. `sum(rate(http_requests_total{status_code=~"5.."}[5m]))` + Current 5xx error rate. + +4. `histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket[5m])))` + p95 latency by endpoint. + +5. `sum(http_requests_in_progress)` + Number of active requests being processed right now. + +6. `sum by (status_code) (rate(http_requests_total[5m]))` + Status code split over time. + +7. `rate(devops_info_endpoint_calls_total[5m])` + Business-level endpoint usage. + +--- + +## Production Setup + +The production-oriented changes are defined in +[`docker-compose.yml`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/docker-compose.yml). + +### Health checks + +- Prometheus: `/-/healthy` +- Loki: `/ready` +- Promtail: `/ready` with fallback to `/-/ready` +- Grafana: `/api/health` +- Flask app: `/health` + +### Resource limits + +- Prometheus: `1 CPU`, `1G` +- Loki: `1 CPU`, `1G` +- Grafana: `0.5 CPU`, `512M` +- App: `0.5 CPU`, `256M` +- Promtail: `0.5 CPU`, `256M` + +### Persistent volumes + +- `prometheus-data` +- `loki-data` +- `grafana-data` +- `promtail-positions` + + +![alt text](image-13.png) +![alt text](image-14.png) +![alt text](image-15.png) + +(11:30 - app down) +--- + +## Metrics vs Logs + +- **Metrics** are best for trends, saturation, error rates, latency + percentiles, and alerting. +- **Logs** are best for request-level investigation, debugging, and + reconstructing what happened for a single event. +- In this project, Prometheus answers questions like "how many requests + per second?" while Loki answers questions like "which request failed + and what headers/path did it have?" + +--- + +## Testing Results + +I did not execute the stack in this turn, per request. The repository is +prepared so you can run it locally and capture the required evidence. + +Expected validation flow: + +1. Start the monitoring stack. +2. Generate some traffic to `/`, `/health`, and `/metrics`. +3. Open Prometheus targets and confirm all jobs are `UP`. +4. Open Grafana and confirm the provisioned dashboard shows live data. +5. Restart the stack and verify the dashboard and data source + provisioning persist. + +--- + +## Challenges & Solutions + +- **Low-cardinality labels:** metrics use normalized Flask route rules + (`/`, `/health`, `/metrics`) rather than raw dynamic paths. +- **Gauge correctness:** active-request tracking increments in + `before_request` and decrements in `teardown_request` so the gauge is + released even if a request errors. +- **Reusable setup:** Grafana data sources and dashboard are provisioned + from files so the environment is reproducible and ready for + screenshots. + +--- + +## How To Run The App + +### Option 1: Run the full monitoring stack + +```bash +cd monitoring +docker compose up -d --build +``` + +Open: + +- App: `http://localhost:8000` +- App health: `http://localhost:8000/health` +- App metrics: `http://localhost:8000/metrics` +- Prometheus: `http://localhost:9090` +- Prometheus targets: `http://localhost:9090/targets` +- Grafana: `http://localhost:3000` + +Default Grafana credentials come from +[`monitoring/.env`](/home/egrapa/prog/tmp/DevOps-Core-Course/monitoring/.env): + +- Username: `admin` +- Password: `changeme` + +### Option 2: Run the Flask app only + +```bash +cd app_python +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +PORT=8000 python app.py +``` + +Then open: + +- `http://localhost:8000/` +- `http://localhost:8000/health` +- `http://localhost:8000/metrics` + +--- \ No newline at end of file diff --git a/monitoring/docs/image-1.png b/monitoring/docs/image-1.png new file mode 100644 index 0000000000..3c5a39f0d9 Binary files /dev/null and b/monitoring/docs/image-1.png differ diff --git a/monitoring/docs/image-10.png b/monitoring/docs/image-10.png new file mode 100644 index 0000000000..6d1f9f5886 Binary files /dev/null and b/monitoring/docs/image-10.png differ diff --git a/monitoring/docs/image-11.png b/monitoring/docs/image-11.png new file mode 100644 index 0000000000..0f362aadba Binary files /dev/null and b/monitoring/docs/image-11.png differ diff --git a/monitoring/docs/image-12.png b/monitoring/docs/image-12.png new file mode 100644 index 0000000000..244aa6336a Binary files /dev/null and b/monitoring/docs/image-12.png differ diff --git a/monitoring/docs/image-13.png b/monitoring/docs/image-13.png new file mode 100644 index 0000000000..d96feb3626 Binary files /dev/null and b/monitoring/docs/image-13.png differ diff --git a/monitoring/docs/image-14.png b/monitoring/docs/image-14.png new file mode 100644 index 0000000000..a67a3549bd Binary files /dev/null and b/monitoring/docs/image-14.png differ diff --git a/monitoring/docs/image-15.png b/monitoring/docs/image-15.png new file mode 100644 index 0000000000..0bd29ad361 Binary files /dev/null and b/monitoring/docs/image-15.png differ diff --git a/monitoring/docs/image-2.png b/monitoring/docs/image-2.png new file mode 100644 index 0000000000..7ba596c3b2 Binary files /dev/null and b/monitoring/docs/image-2.png differ diff --git a/monitoring/docs/image-3.png b/monitoring/docs/image-3.png new file mode 100644 index 0000000000..35aca15955 Binary files /dev/null and b/monitoring/docs/image-3.png differ diff --git a/monitoring/docs/image-4.png b/monitoring/docs/image-4.png new file mode 100644 index 0000000000..079c4b2677 Binary files /dev/null and b/monitoring/docs/image-4.png differ diff --git a/monitoring/docs/image-5.png b/monitoring/docs/image-5.png new file mode 100644 index 0000000000..bbbd926cae Binary files /dev/null and b/monitoring/docs/image-5.png differ diff --git a/monitoring/docs/image-6.png b/monitoring/docs/image-6.png new file mode 100644 index 0000000000..4496a0b3ce Binary files /dev/null and b/monitoring/docs/image-6.png differ diff --git a/monitoring/docs/image-7.png b/monitoring/docs/image-7.png new file mode 100644 index 0000000000..e1e2c17e01 Binary files /dev/null and b/monitoring/docs/image-7.png differ diff --git a/monitoring/docs/image-8.png b/monitoring/docs/image-8.png new file mode 100644 index 0000000000..ccd0896525 Binary files /dev/null and b/monitoring/docs/image-8.png differ diff --git a/monitoring/docs/image-9.png b/monitoring/docs/image-9.png new file mode 100644 index 0000000000..b96821f425 Binary files /dev/null and b/monitoring/docs/image-9.png differ diff --git a/monitoring/docs/image.png b/monitoring/docs/image.png new file mode 100644 index 0000000000..8f5bbfeb1d Binary files /dev/null and b/monitoring/docs/image.png differ diff --git a/monitoring/grafana/dashboards/app-metrics-dashboard.json b/monitoring/grafana/dashboards/app-metrics-dashboard.json new file mode 100644 index 0000000000..624fe818a8 --- /dev/null +++ b/monitoring/grafana/dashboards/app-metrics-dashboard.json @@ -0,0 +1,428 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (endpoint) (rate(http_requests_total[5m]))", + "legendFormat": "{{endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "Request Rate by Endpoint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx errors/sec", + "range": true, + "refId": "A" + } + ], + "title": "Error Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket[5m])))", + "legendFormat": "{{endpoint}} p95", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration p95", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlOrRd" + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (le) (rate(http_request_duration_seconds_bucket[5m]))", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration Heatmap", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(http_requests_in_progress)", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 6, + "options": { + "displayLabels": [ + "name", + "percent" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (status_code) (rate(http_requests_total[5m]))", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Status Code Distribution", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "DOWN" + }, + "1": { + "text": "UP" + } + }, + "type": "value" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "editorMode": "code", + "expr": "max(up{job=\"app\"})", + "refId": "A" + } + ], + "title": "App Uptime", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "lab08", + "prometheus", + "grafana" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Lab 8 - Application Metrics", + "uid": "lab08-app-metrics", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/grafana/provisioning/dashboards/dashboard-provider.yml b/monitoring/grafana/provisioning/dashboards/dashboard-provider.yml new file mode 100644 index 0000000000..373585df4b --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboard-provider.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "lab08-dashboards" + orgId: 1 + folder: "Lab 8" + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards diff --git a/monitoring/grafana/provisioning/datasources/datasources.yml b/monitoring/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 0000000000..8bbb9864a9 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,18 @@ +apiVersion: 1 + +datasources: + - name: Loki + uid: loki + type: loki + access: proxy + url: http://loki:3100 + isDefault: false + editable: true + + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..3f198eb717 --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,50 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9095 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/tsdb-index + cache_location: /loki/tsdb-cache + filesystem: + directory: /loki/chunks + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + delete_request_store: filesystem + +limits_config: + retention_period: 168h diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..ada7448c7d --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "app" + metrics_path: "/metrics" + static_configs: + - targets: ["app-python:8000"] + + - job_name: "loki" + metrics_path: "/metrics" + static_configs: + - targets: ["loki:3100"] + + - job_name: "grafana" + metrics_path: "/metrics" + static_configs: + - targets: ["grafana:3000"] diff --git a/monitoring/promtail/Dockerfile b/monitoring/promtail/Dockerfile new file mode 100644 index 0000000000..f3e0cd2173 --- /dev/null +++ b/monitoring/promtail/Dockerfile @@ -0,0 +1,7 @@ +FROM grafana/promtail:3.0.0 + +# Add curl for healthcheck endpoints (base image lacks curl/wget/nc) +RUN sed -i '/bullseye-backports/d' /etc/apt/sources.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends curl && \ + rm -rf /var/lib/apt/lists/* diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..6597ce2941 --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,38 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /var/log/promtail/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + - source_labels: ['__meta_docker_container_log_stream'] + regex: 'stdout|stderr' + action: keep + + - source_labels: ['__meta_docker_container_label_logging'] + regex: 'promtail' + action: keep + + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: container + + - source_labels: ['__meta_docker_container_label_app'] + target_label: app + + pipeline_stages: + - docker: {} + - labels: + stream: + - static_labels: + job: docker diff --git a/pulumi/Pulumi.python.yaml b/pulumi/Pulumi.python.yaml new file mode 100644 index 0000000000..012e19b27c --- /dev/null +++ b/pulumi/Pulumi.python.yaml @@ -0,0 +1,11 @@ +encryptionsalt: v1:WNo4vXpzSBs=:v1:nDAI49mvq+hIqXfY:PJvNLVUTZ2WY/952GHIBhSkWHFcidA== +config: + lab04-yc:zone: ru-central1-a + lab04-yc:sshUser: ubuntu + lab04-yc:mySshCidr: 5.228.112.78/32 + lab04-yc:sshPublicKeyPath: /home/egrapa/.ssh/lab04.pub + lab04-yc:imageId: fd8lt661chfo5i13a40d + lab04-yc:service_account_key_file: /home/egrapa/prog/DevOps-Core-Course/terraform/authorized_key.json + yandex:serviceAccountKeyFile: /home/egrapa/prog/DevOps-Core-Course/terraform/authorized_key.json + yandex:folderId: b1g8k1f0jfvboii8ftl3 + yandex:cloudId: b1g4g7ucasok23ajm606 diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..adccaedb6a --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,3 @@ +name: lab04-yc +runtime: python +description: Lab04 VM on Yandex Cloud (Pulumi) \ No newline at end of file diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000000..bc2d27913d --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,107 @@ +import pulumi +import pulumi_yandex as yandex + +cfg = pulumi.Config() + +zone = cfg.get("zone") or "ru-central1-a" +ssh_user = cfg.get("sshUser") or "ubuntu" +my_ssh_cidr = cfg.require("mySshCidr") # "1.2.3.4/32" +ssh_pubkey_path = cfg.require("sshPublicKeyPath") +image_id = cfg.require("imageId") + +with open(ssh_pubkey_path, "r", encoding="utf-8") as f: + pubkey = f.read().strip() + +net = yandex.VpcNetwork("lab04-net") + +subnet = yandex.VpcSubnet( + "lab04-subnet", + network_id=net.id, + zone=zone, + v4_cidr_blocks=["10.10.0.0/24"], +) + +sg = yandex.VpcSecurityGroup( + "lab04-sg", + network_id=net.id, +) +# SSH 22 только с твоего IP +yandex.VpcSecurityGroupRule( + "lab04-sg-ssh", + security_group_binding=sg.id, + direction="ingress", + description="SSH from my IP", + protocol="TCP", + v4_cidr_blocks=[my_ssh_cidr], + port=22, +) + +# HTTP 80 +yandex.VpcSecurityGroupRule( + "lab04-sg-http", + security_group_binding=sg.id, + direction="ingress", + description="HTTP", + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + port=80, +) + +# App 5000 +yandex.VpcSecurityGroupRule( + "lab04-sg-5000", + security_group_binding=sg.id, + direction="ingress", + description="App 5000", + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + port=5000, +) + +# Egress all +yandex.VpcSecurityGroupRule( + "lab04-sg-egress", + security_group_binding=sg.id, + direction="egress", + description="Allow all egress", + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], +) +vm = yandex.ComputeInstance( + "lab04-vm", + name="lab04-vm", + zone=zone, + platform_id="standard-v2", + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + core_fraction=20, + memory=1, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id=image_id, + size=10, + type="network-hdd", + ) + ), + network_interfaces=[yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[sg.id], + )], + metadata={ + "ssh-keys": f"{ssh_user}:{pubkey}", + }, +) +pulumi.export( + "public_ip", + vm.network_interfaces.apply(lambda nics: nics[0]["nat_ip_address"]) +) + +pulumi.export( + "ssh_command", + pulumi.Output.concat( + "ssh ", ssh_user, "@", + vm.network_interfaces.apply(lambda nics: nics[0]["nat_ip_address"]) + ) +) \ No newline at end of file diff --git a/pulumi/__pycache__/__main__.cpython-311.pyc b/pulumi/__pycache__/__main__.cpython-311.pyc new file mode 100644 index 0000000000..14d9911a5f Binary files /dev/null and b/pulumi/__pycache__/__main__.cpython-311.pyc differ diff --git a/pulumi/__pycache__/__main__.cpython-314.pyc b/pulumi/__pycache__/__main__.cpython-314.pyc new file mode 100644 index 0000000000..e1c4f2aca3 Binary files /dev/null and b/pulumi/__pycache__/__main__.cpython-314.pyc differ diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000000..efcb64d2fc --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1,2 @@ +pulumi>=3.0.0 +pulumi-yandex==0.13.0 \ No newline at end of file diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000000..2963b2a58b --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,12 @@ +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars + +# Keys +*.pem +*.key +*.json \ No newline at end of file diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000000..8a5f0e269c --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,97 @@ +terraform { + required_version = ">= 1.9.0" + required_providers { + yandex = { + source = "yandex-cloud/yandex" + } + } +} + +provider "yandex" { + service_account_key_file = var.service_account_key_file + cloud_id = var.cloud_id + folder_id = var.folder_id + zone = var.zone +} + +data "yandex_compute_image" "ubuntu" { + family = "ubuntu-2404-lts" +} + +resource "yandex_vpc_network" "net" { + name = "lab04-net" +} + +resource "yandex_vpc_subnet" "subnet" { + name = "lab04-subnet" + zone = var.zone + network_id = yandex_vpc_network.net.id + v4_cidr_blocks = [var.subnet_cidr] +} + +resource "yandex_vpc_security_group" "sg" { + name = "lab04-sg" + network_id = yandex_vpc_network.net.id + + # SSH только с твоего IP + ingress { + protocol = "TCP" + description = "SSH from my IP" + v4_cidr_blocks = [var.my_ssh_cidr] + port = 22 + } + + # HTTP + ingress { + protocol = "TCP" + description = "HTTP" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 80 + } + + # App port + ingress { + protocol = "TCP" + description = "App 5000" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 5000 + } + + # Исходящий трафик наружу (чтобы apt работал) + egress { + protocol = "ANY" + description = "Allow all egress" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_compute_instance" "vm" { + name = "lab04-vm" + platform_id = "standard-v2" + zone = var.zone + + resources { + cores = 2 + core_fraction = 20 + memory = 1 + } + + boot_disk { + initialize_params { + image_id = data.yandex_compute_image.ubuntu.id + size = 10 + type = "network-hdd" + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.subnet.id + nat = true + security_group_ids = [yandex_vpc_security_group.sg.id] + } + + metadata = { + # формат "user:" + ssh-keys = "${var.ssh_user}:${file(var.ssh_public_key_path)}" + } +} \ No newline at end of file diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000000..60916ec479 --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,7 @@ +output "public_ip" { + value = yandex_compute_instance.vm.network_interface[0].nat_ip_address +} + +output "ssh_command" { + value = "ssh ${var.ssh_user}@${yandex_compute_instance.vm.network_interface[0].nat_ip_address}" +} \ No newline at end of file diff --git a/terraform/terraform.tfvars.example b/terraform/terraform.tfvars.example new file mode 100644 index 0000000000..01822dd3ce --- /dev/null +++ b/terraform/terraform.tfvars.example @@ -0,0 +1,8 @@ +cloud_id = "xxxx" +folder_id = "yyyy" +zone = "ru-central1-a" + +service_account_key_file = "./key.json" + +ssh_public_key_path = "~/.ssh/id_ed25519.pub" +my_ssh_cidr = "1.2.3.4/32" \ No newline at end of file diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000000..014342f781 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,31 @@ +variable "cloud_id" { type = string } +variable "folder_id" { type = string } +variable "zone" { + type = string + default = "ru-central1-a" +} + +variable "service_account_key_file" { + type = string + description = "Path to SA key.json (do not commit)" +} + +variable "ssh_user" { + type = string + default = "ubuntu" +} + +variable "ssh_public_key_path" { + type = string + description = "Path to your public key, e.g. ~/.ssh/id_ed25519.pub" +} + +variable "my_ssh_cidr" { + type = string + description = "Your public IP in CIDR for SSH, e.g. 1.2.3.4/32" +} + +variable "subnet_cidr" { + type = string + default = "10.10.0.0/24" +} \ No newline at end of file