diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml deleted file mode 100644 index 6951f935f1..0000000000 --- a/.github/workflows/ansible-deploy.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: Ansible Deployment - -on: - push: - paths: - - 'ansible/**' # Ansible code - - '!ansible/docs/**' # Exclude docs - - '!ansible/README.md' - - '.github/workflows/ansible-deploy.yml' # Workflow changes - pull_request: - paths: - - 'ansible/**' # Ansible code - - '!ansible/docs/**' # Exclude docs - - '!ansible/README.md' - - '.github/workflows/ansible-deploy.yml' # Workflow changes - -jobs: - lint: - name: Ansible Lint - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.14' - - - name: Install dependencies - run: | - pip install ansible ansible-lint - - - name: Run ansible-lint - run: | - cd ansible - ansible-lint playbooks/*.yml - - deploy: - name: Deploy Application - needs: lint - runs-on: self-hosted - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.14' - - - name: Install dependencies - run: | - pip install ansible - - - name: Run playbook - run: | - cd ansible - touch /tmp/vaultpass.txt - chmod 600 /tmp/vaultpass.txt - echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vaultpass.txt - ansible-playbook playbooks/deploy.yml -i inventory/hosts-runner.ini --vault-password-file /tmp/vaultpass.txt - rm /tmp/vaultpass.txt - - - name: Verify deployment - run: | - sleep 10 # Wait for app to start - curl -f http://${{ secrets.VM_HOST }}:5000 || exit 1 - curl -f http://${{ secrets.VM_HOST }}:5000/health || exit 1 diff --git a/app_python/app.py b/app_python/app.py index 06194342e7..77e4af2f8a 100644 --- a/app_python/app.py +++ b/app_python/app.py @@ -2,21 +2,38 @@ DevOps Info Service Main application module """ +import json from flask import Flask, jsonify, request from datetime import datetime, timezone import logging import os import platform import socket + HOST = os.getenv('HOST', '0.0.0.0') PORT = int(os.getenv('PORT', 5000)) DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' -logging.basicConfig( - level=logging.INFO if not DEBUG else logging.DEBUG, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) + +class JSONFormatter(logging.Formatter): + def __init__(self): + super().__init__() + + def format(self, record: logging.LogRecord) -> str: + rec = { + "timestamp": datetime.now().isoformat(), + "level": record.levelname, + "message": record.getMessage(), + } + if "http" in record.__dict__: + rec["http"] = record.http + return json.dumps(rec) + + +logger = logging.Logger(__name__, logging.INFO if not DEBUG else logging.DEBUG) +stderrhandler = logging.StreamHandler() +logger.addHandler(stderrhandler) +stderrhandler.setFormatter(JSONFormatter()) app: Flask = Flask(__name__) @@ -64,10 +81,20 @@ def get_request_info() -> dict[str, str | None]: } +def get_http_extra_info(): + return { + "http": { + "location": request.path, + "method": request.method, + "ip": request.remote_addr, + } + } + + @app.route('/') def index(): - logger.debug(f'Request: {request.method} {request.path}') """Main endpoint - service and system information.""" + logger.debug(f'Request: {request.method} {request.path}', extra=get_http_extra_info()) return jsonify({ 'service': { 'name': 'devops-info-service', @@ -87,7 +114,7 @@ def index(): @app.route('/health') def health(): - logger.debug(f'Request: {request.method} {request.path}') + logger.debug(f'Request: {request.method} {request.path}', extra=get_http_extra_info()) return jsonify({ 'status': 'healthy', 'timestamp': datetime.now(timezone.utc).isoformat(), @@ -96,7 +123,8 @@ def health(): @app.errorhandler(404) -def not_found(error): +def notfound_handler(e): + logger.info('A 404 Not Found error occured', extra=get_http_extra_info()) return jsonify({ 'error': 'Not Found', 'message': 'Endpoint does not exist' @@ -104,7 +132,8 @@ def not_found(error): @app.errorhandler(500) -def internal_error(error): +def internal_error(e): + logger.error('Internal Server Error 500', extra=get_http_extra_info()) return jsonify({ 'error': 'Internal Server Error', 'message': 'An unexpected error occurred' @@ -112,6 +141,6 @@ def internal_error(error): START_TIME = datetime.now(timezone.utc) -logger.info('Application starting...') +logger.info('Application starting... Configured with log level=%s', 'DEBUG' if DEBUG else 'INFO') if __name__ == '__main__': app.run(host=HOST, port=PORT, debug=DEBUG) diff --git a/monitoring/.gitignore b/monitoring/.gitignore new file mode 100644 index 0000000000..4c49bd78f1 --- /dev/null +++ b/monitoring/.gitignore @@ -0,0 +1 @@ +.env diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..df98f69d30 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,87 @@ +services: + loki: + image: grafana/loki:3.0.0 + ports: + - 3100:3100 + volumes: + - ${PWD}/loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + command: -config.file=/etc/loki/config.yml + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s # Grace period for startup + deploy: + resources: + limits: + cpus: '1.0' + memory: 3G + reservations: + cpus: '1' + memory: 1G + + promtail: + image: grafana/promtail:3.0.0 + volumes: + - ${PWD}/promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + command: -config.file=/etc/promtail/config.yml + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '1' + memory: 500M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - 3000:3000 + environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ALLOW_EMBEDDING=false + - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASS} + - GF_SECURITY_ADMIN_EMAIL=${ADMIN_LOGIN} + volumes: + - grafana-data:/var/lib/grafana + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '1' + memory: 500M + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s # Grace period for startup + + devops-infoservice: + build: ${PWD}/../app_python + ports: + - 5000:5000 + environment: + - DEBUG=true + labels: + logging: "promtail" + app: "devops-infoservice" + deploy: + resources: + limits: + cpus: '3.0' + memory: 4G + reservations: + cpus: '3' + memory: 1G + +volumes: + loki-data: + grafana-data: diff --git a/monitoring/docs/L7t1_grafana.png b/monitoring/docs/L7t1_grafana.png new file mode 100644 index 0000000000..4532864adf Binary files /dev/null and b/monitoring/docs/L7t1_grafana.png differ diff --git a/monitoring/docs/L7t1_infoservice.png b/monitoring/docs/L7t1_infoservice.png new file mode 100644 index 0000000000..c1d1bcb839 Binary files /dev/null and b/monitoring/docs/L7t1_infoservice.png differ diff --git a/monitoring/docs/L7t1_loki.png b/monitoring/docs/L7t1_loki.png new file mode 100644 index 0000000000..409f6e7d16 Binary files /dev/null and b/monitoring/docs/L7t1_loki.png differ diff --git a/monitoring/docs/L7t1_promtail.png b/monitoring/docs/L7t1_promtail.png new file mode 100644 index 0000000000..e2402e4b20 Binary files /dev/null and b/monitoring/docs/L7t1_promtail.png differ diff --git a/monitoring/docs/L7t2_grafana.png b/monitoring/docs/L7t2_grafana.png new file mode 100644 index 0000000000..fa66aad643 Binary files /dev/null and b/monitoring/docs/L7t2_grafana.png differ diff --git a/monitoring/docs/L7t2_terminalout.png b/monitoring/docs/L7t2_terminalout.png new file mode 100644 index 0000000000..ea8013ee2a Binary files /dev/null and b/monitoring/docs/L7t2_terminalout.png differ diff --git a/monitoring/docs/L7t3_panels.png b/monitoring/docs/L7t3_panels.png new file mode 100644 index 0000000000..e293b44ee2 Binary files /dev/null and b/monitoring/docs/L7t3_panels.png differ diff --git a/monitoring/docs/L7t4_compose.png b/monitoring/docs/L7t4_compose.png new file mode 100644 index 0000000000..b322296f47 Binary files /dev/null and b/monitoring/docs/L7t4_compose.png differ diff --git a/monitoring/docs/L7t4_login.png b/monitoring/docs/L7t4_login.png new file mode 100644 index 0000000000..b2daf9e676 Binary files /dev/null and b/monitoring/docs/L7t4_login.png differ diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..9dfdb62041 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,101 @@ +# Task 1 + +## Evidence + +#### infoservice +![infoservice](/monitoring/docs/L7t1_infoservice.png) + +#### grafana +![grafana](/monitoring/docs/L7t1_grafana.png) + +#### loki +![loki](/monitoring/docs/L7t1_loki.png) + +#### promtail +![promtail](/monitoring/docs/L7t1_promtail.png) + +# Task 2 + +#### terminal output + +![terminalout](/monitoring/docs/L7t2_terminalout.png) + +#### grafana + +![grafana](/monitoring/docs/L7t2_grafana.png) + +# Task 3 + +#### panels + +![panels](/monitoring/docs/L7t3_panels.png) + +# Task 4 + +#### login page + +![login page](/monitoring/docs/L7t4_login.png) + +#### docker compose output + +![docker compose output](/monitoring/docs/L7t4_compose.png) + +# Task 5 + + +## Architecture + +```text +devops-infoservice + ^ + | + v +promtail -> loki <-> grafana +``` + +## Setup Guide + +1. Run `docker compose up -d`. +2. Follow the detailed instructions on how to set up panels in the grafana webUI. + +## Configuration + +The configuration files are self-explanatory: +- promtail discovers docker containers, filters only the one that is the infoservice, labels the logs. +- loki stores the logs efficiently and indexes them once a day, vacuums once a week. + +## Application Logging + +I derived JSONFormatter class and used it. + +## Dashboard + +1. **Logs Table** (Logs visualization) + - Shows recent logs from all apps + - Query: `{app=~"devops-.*"}` + +2. **Request Rate** (Time series graph) + - Shows logs per second by app + - Query: `sum by (app) (rate({app=~"devops-.*"} [1m]))` + +3. **Error Logs** (Logs visualization) + - Shows only ERROR level logs + - Query: `{app=~"devops-.*"} | json | level="ERROR"` + +4. **Log Level Distribution** (Stat or Pie chart) + - Count logs by level (INFO, ERROR, etc.) + - Query: `sum by (level) (count_over_time({app=~"devops-.*"} | json [5m]))` + +## Production Config + +The most important configuration is setting the security environment variables. Also, I allocated several GiB of memory +and several cores of CPU to the services in total. + +## Testing + +Run `docker compose ps` or check grafana. + +## Challenges + +Very difficult to configure. Luckily, the lecture provided a basic example. + diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..24eaed5982 --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,29 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +limits_config: + retention_period: 168h diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..7520b10e31 --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,21 @@ +server: + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container'