diff --git a/Cargo.lock b/Cargo.lock index 45f54db2..409112aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2321,6 +2321,7 @@ dependencies = [ "sha2 0.10.9", "shared_child", "smallvec", + "subtle", "tdx-attest", "tempfile", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 01c20ecd..c712629e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -192,6 +192,7 @@ rustls-webpki = "0.103.10" schnorrkel = "0.11.4" sha2 = { version = "0.10.8", default-features = false } sha3 = "0.10.8" +subtle = "2" blake2 = "0.10.6" tokio-rustls = { version = "0.26.2", features = ["ring"] } x25519-dalek = { version = "2.0.1", features = ["static_secrets"] } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index a1aefe17..af522ec4 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -62,6 +62,7 @@ uuid = { workspace = true, features = ["v4"] } rmp-serde.workspace = true or-panic.workspace = true base64.workspace = true +subtle.workspace = true [target.'cfg(unix)'.dependencies] nix = { workspace = true, features = ["resource"] } diff --git a/gateway/docs/cluster-deployment.md b/gateway/docs/cluster-deployment.md index 9441e625..191d4c0e 100644 --- a/gateway/docs/cluster-deployment.md +++ b/gateway/docs/cluster-deployment.md @@ -289,12 +289,18 @@ Important: ### 2.7 Verify Cluster Sync +The admin API requires a bearer token (see `core.admin.admin_token` in `gateway.toml`, +or the `ADMIN_API_TOKEN` env injected by `deploy-to-vmm.sh`). Export it once: + ```bash +export ADMIN_API_TOKEN=... # value from .env or gateway.toml +ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_API_TOKEN") + # Check sync status on any node (replace port with your admin port) -curl -s http://localhost:9016/prpc/WaveKvStatus | jq . +curl -s "${ADMIN_AUTH[@]}" http://localhost:9016/prpc/WaveKvStatus | jq . # List known cluster nodes -curl -s http://localhost:9016/prpc/Status | jq '.nodes' +curl -s "${ADMIN_AUTH[@]}" http://localhost:9016/prpc/Status | jq '.nodes' ``` A healthy cluster sync shows: @@ -567,7 +573,8 @@ $CLI info Check that the gateway sees the new app: ```bash -curl -s http://localhost:/prpc/Status | jq '.hosts' +curl -s -H "Authorization: Bearer $ADMIN_API_TOKEN" \ + http://localhost:/prpc/Status | jq '.hosts' ``` Expected output should include an entry with the app's `instance_id` and an assigned WireGuard IP: @@ -619,8 +626,11 @@ Gateway supports automatic TLS certificate management via the ACME protocol. Con ### 6.1 Configure ACME Service ```bash +ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_API_TOKEN") + # Set ACME URL (Let's Encrypt production) -curl -X POST "http://localhost:9016/prpc/SetCertbotConfig" \ +curl -X POST "${ADMIN_AUTH[@]}" \ + "http://localhost:9016/prpc/SetCertbotConfig" \ -H "Content-Type: application/json" \ -d '{"acme_url": "https://acme-v02.api.letsencrypt.org/directory"}' @@ -637,7 +647,8 @@ The Cloudflare API token needs the **DNS:Edit** permission on the target zone. C Cloudflare example: ```bash -curl -X POST "http://localhost:9016/prpc/CreateDnsCredential" \ +curl -X POST "${ADMIN_AUTH[@]}" \ + "http://localhost:9016/prpc/CreateDnsCredential" \ -H "Content-Type: application/json" \ -d '{ "name": "cloudflare-prod", @@ -669,7 +680,8 @@ Parameter description: Basic usage (using default DNS credential): ```bash -curl -X POST "http://localhost:9016/prpc/AddZtDomain" \ +curl -X POST "${ADMIN_AUTH[@]}" \ + "http://localhost:9016/prpc/AddZtDomain" \ -H "Content-Type: application/json" \ -d '{"domain": "example.com", "port": 443}' ``` @@ -677,7 +689,8 @@ curl -X POST "http://localhost:9016/prpc/AddZtDomain" \ Specifying DNS credential and node binding: ```bash -curl -X POST "http://localhost:9016/prpc/AddZtDomain" \ +curl -X POST "${ADMIN_AUTH[@]}" \ + "http://localhost:9016/prpc/AddZtDomain" \ -H "Content-Type: application/json" \ -d '{ "domain": "internal.example.com", @@ -711,7 +724,8 @@ Note: After adding a domain, the certificate is not issued immediately. Gateway ### 6.4 Manually Trigger Certificate Renewal ```bash -curl -X POST "http://localhost:9016/prpc/RenewZtDomainCert" \ +curl -X POST "${ADMIN_AUTH[@]}" \ + "http://localhost:9016/prpc/RenewZtDomainCert" \ -H "Content-Type: application/json" \ -d '{"domain": "example.com", "force": true}' ``` @@ -719,7 +733,7 @@ curl -X POST "http://localhost:9016/prpc/RenewZtDomainCert" \ ### 6.5 Check Certificate Status ```bash -curl -s http://localhost:9016/prpc/ListZtDomains | jq . +curl -s "${ADMIN_AUTH[@]}" http://localhost:9016/prpc/ListZtDomains | jq . ``` A healthy certificate shows `has_cert: true` and `loaded_in_memory: true`: diff --git a/gateway/dstack-app/bootstrap-cluster.sh b/gateway/dstack-app/bootstrap-cluster.sh index 06bae6da..f1ad6b48 100755 --- a/gateway/dstack-app/bootstrap-cluster.sh +++ b/gateway/dstack-app/bootstrap-cluster.sh @@ -14,17 +14,26 @@ # Load .env if present if [ -f ".env" ]; then set -a + # shellcheck source=/dev/null source .env set +a fi ADMIN_ADDR="${1:-${GATEWAY_ADMIN_RPC_ADDR:-127.0.0.1:9203}}" +# bootstrap-cluster.sh authenticates to the admin API as an operator. The token +# is generated by deploy-to-vmm.sh and persisted in .env. +if [ -z "${ADMIN_API_TOKEN:-}" ]; then + echo "ERROR: ADMIN_API_TOKEN must be set (check .env)" >&2 + exit 1 +fi +AUTH_HEADER=(-H "Authorization: Bearer $ADMIN_API_TOKEN") + echo "Waiting for gateway admin API at $ADMIN_ADDR..." max_retries=60 retry=0 while [ $retry -lt $max_retries ]; do - if curl -sf "http://$ADMIN_ADDR/prpc/Status" >/dev/null 2>&1; then + if curl -sf "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/Status" >/dev/null 2>&1; then break fi retry=$((retry + 1)) @@ -47,19 +56,19 @@ else fi echo "Setting certbot config (ACME URL: $ACME_URL)..." -curl -sf -X POST "http://$ADMIN_ADDR/prpc/SetCertbotConfig" \ +curl -sf -X POST "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/SetCertbotConfig" \ -H "Content-Type: application/json" \ -d '{"acme_url":"'"$ACME_URL"'","renew_interval_secs":3600,"renew_before_expiration_secs":864000,"renew_timeout_secs":300}' >/dev/null \ && echo " Certbot config set" || echo " WARN: failed to set certbot config" # Create DNS credential if CF_API_TOKEN is provided and no credentials exist yet if [ -n "$CF_API_TOKEN" ]; then - existing=$(curl -sf "http://$ADMIN_ADDR/prpc/ListDnsCredentials" 2>/dev/null) + existing=$(curl -sf "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/ListDnsCredentials" 2>/dev/null) cred_count=$(echo "$existing" | jq -r '.credentials | length' 2>/dev/null || echo "0") if [ "$cred_count" = "0" ]; then echo "Creating default DNS credential..." - curl -sf -X POST "http://$ADMIN_ADDR/prpc/CreateDnsCredential" \ + curl -sf -X POST "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/CreateDnsCredential" \ -H "Content-Type: application/json" \ -d '{"name":"cloudflare","provider_type":"cloudflare","cf_api_token":"'"$CF_API_TOKEN"'","set_as_default":true}' >/dev/null \ && echo " DNS credential created" || echo " WARN: failed to create DNS credential" @@ -72,12 +81,12 @@ fi # Add ZT-Domain if SRV_DOMAIN is provided and domain doesn't exist yet if [ -n "$SRV_DOMAIN" ]; then - existing=$(curl -sf "http://$ADMIN_ADDR/prpc/ListZtDomains" 2>/dev/null) + existing=$(curl -sf "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/ListZtDomains" 2>/dev/null) has_domain=$(echo "$existing" | jq -r '.domains[]? | select(.domain=="'"$SRV_DOMAIN"'") | .domain' 2>/dev/null) if [ -z "$has_domain" ]; then echo "Adding ZT-Domain: $SRV_DOMAIN..." - curl -sf -X POST "http://$ADMIN_ADDR/prpc/AddZtDomain" \ + curl -sf -X POST "${AUTH_HEADER[@]}" "http://$ADMIN_ADDR/prpc/AddZtDomain" \ -H "Content-Type: application/json" \ -d '{"domain":"'"$SRV_DOMAIN"'","port":443,"priority":100}' >/dev/null \ && echo " ZT-Domain added" || echo " WARN: failed to add ZT-Domain" diff --git a/gateway/dstack-app/builder/entrypoint.sh b/gateway/dstack-app/builder/entrypoint.sh index 39182d10..1a3811ad 100755 --- a/gateway/dstack-app/builder/entrypoint.sh +++ b/gateway/dstack-app/builder/entrypoint.sh @@ -36,6 +36,12 @@ validate_env "$NODE_ID" validate_env "$WG_IP" validate_env "$WG_RESERVED_NET" validate_env "$WG_CLIENT_RANGE" +validate_env "$ADMIN_API_TOKEN" + +if [ -z "$ADMIN_API_TOKEN" ]; then + echo "ADMIN_API_TOKEN must be set when admin API is enabled" + exit 1 +fi # Validate $NODE_ID, must be a number if [[ ! "$NODE_ID" =~ ^[0-9]+$ ]]; then @@ -89,6 +95,7 @@ sync_connections_interval = "${SYNC_CONNECTIONS_INTERVAL:-30s}" enabled = true address = "${ADMIN_LISTEN_ADDR:-0.0.0.0}" port = ${ADMIN_LISTEN_PORT:-8001} +admin_token = "${ADMIN_API_TOKEN}" [core.wg] public_key = "$PUBLIC_KEY" diff --git a/gateway/dstack-app/deploy-to-vmm.sh b/gateway/dstack-app/deploy-to-vmm.sh index 51ee1c16..5ae76a87 100755 --- a/gateway/dstack-app/deploy-to-vmm.sh +++ b/gateway/dstack-app/deploy-to-vmm.sh @@ -95,6 +95,11 @@ WG_ADDR=0.0.0.0:9202 # The token used to launch the App APP_LAUNCH_TOKEN=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 32 | head -n 1) +# Bearer token required by the gateway admin API. Used by bootstrap-cluster.sh +# and any operator who calls the admin API. Persisted into .env so cluster +# bootstrap can reach the API after deploy. +ADMIN_API_TOKEN=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 48 | head -n 1) + # PROXY protocol: read v1/v2 header from inbound connections (e.g. when this # gateway sits behind a PP-aware L4 LB such as Cloudflare Spectrum or haproxy # with send-proxy). Set to "true" only if the upstream LB is configured to @@ -117,6 +122,7 @@ required_env_vars=( "GATEWAY_APP_ID" "MY_URL" "APP_LAUNCH_TOKEN" + "ADMIN_API_TOKEN" "NODE_ID" "KMS_URL" # "BOOTNODE_URL" @@ -180,6 +186,7 @@ WG_IP=$WG_IP WG_RESERVED_NET=$WG_RESERVED_NET WG_CLIENT_RANGE=$WG_CLIENT_RANGE APP_LAUNCH_TOKEN=$APP_LAUNCH_TOKEN +ADMIN_API_TOKEN=$ADMIN_API_TOKEN RPC_DOMAIN=$RPC_DOMAIN NODE_ID=$NODE_ID PROXY_LISTEN_PORT=$PROXY_LISTEN_PORT diff --git a/gateway/dstack-app/docker-compose.yaml b/gateway/dstack-app/docker-compose.yaml index 7ec32af3..e48c231b 100644 --- a/gateway/dstack-app/docker-compose.yaml +++ b/gateway/dstack-app/docker-compose.yaml @@ -41,6 +41,7 @@ services: - TIMEOUT_TOTAL=${TIMEOUT_TOTAL:-5h} - ADMIN_LISTEN_ADDR=${ADMIN_LISTEN_ADDR:-0.0.0.0} - ADMIN_LISTEN_PORT=${ADMIN_LISTEN_PORT:-8001} + - ADMIN_API_TOKEN=${ADMIN_API_TOKEN:-} - INBOUND_PP_ENABLED=${INBOUND_PP_ENABLED:-false} - TIMEOUT_PP_HEADER=${TIMEOUT_PP_HEADER:-5s} - PORT_POLICY_FETCH_TIMEOUT=${PORT_POLICY_FETCH_TIMEOUT:-10s} diff --git a/gateway/gateway.toml b/gateway/gateway.toml index 6d4151e0..a7383abb 100644 --- a/gateway/gateway.toml +++ b/gateway/gateway.toml @@ -24,6 +24,15 @@ timeout = "5s" [core.admin] enabled = false address = "127.0.0.1:8011" +# Shared secret required by every admin endpoint (RPC + dashboard). Can also +# be supplied via the `DSTACK_GATEWAY_ADMIN_TOKEN` or `ADMIN_API_TOKEN` env +# vars. Clients send it as `Authorization: Bearer `, `X-Admin-Token`, +# or (GET only, for dashboard links) `?token=...`. Required unless +# `insecure_no_auth = true`. +admin_token = "" +# Development/testing escape hatch only. Never enable this on an admin +# interface that is reachable from the network. +insecure_no_auth = false [core.debug] insecure_enable_debug_rpc = false diff --git a/gateway/src/admin_auth.rs b/gateway/src/admin_auth.rs new file mode 100644 index 00000000..27fa3025 --- /dev/null +++ b/gateway/src/admin_auth.rs @@ -0,0 +1,558 @@ +// SPDX-FileCopyrightText: © 2025 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +//! Admin server authentication. +//! +//! Attaches to the admin Rocket instance and rejects requests that do not +//! present the configured shared secret. The token is accepted via, in order: +//! 1. `X-Admin-Token` header (any method) +//! 2. `Authorization: Bearer ` header (any method) +//! 3. `Authorization: Basic ` (any method; token may be +//! in either the user or password field — needed so plain browsers can +//! authenticate to the dashboard via the native HTTP-auth prompt) +//! 4. `?token=` query parameter (GET only, for dashboard links) +//! +//! For (4), the `token` query parameter is stripped from the request URI after +//! successful validation so it doesn't propagate to access logs, downstream +//! handlers, or the Referer header. +//! +//! Rejected requests are forwarded to a sentinel route that returns HTTP 401 +//! with `WWW-Authenticate: Basic realm="dstack-gateway admin"` so browsers +//! show the native login prompt. All admin routes (prpc-generated and +//! dashboard) are protected by this single fairing attachment without +//! modifying the route declarations. +//! +//! The token is only ever held in memory as its SHA-256 hash; the configured +//! plaintext is dropped right after the fairing is constructed. + +use anyhow::{bail, Result}; +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use rocket::{ + fairing::{Fairing, Info, Kind}, + http::{uri::Origin, Header, Method, Status}, + response::Responder, + Data, Request, Response, Route, +}; +use sha2::{Digest, Sha256}; +use subtle::ConstantTimeEq; + +use crate::config::AdminConfig; + +const UNAUTH_URI: &str = "/__admin_unauthorized"; +const HEADER_NAME: &str = "X-Admin-Token"; +const QUERY_PARAM: &str = "token"; +const ENV_ADMIN_TOKEN: &str = "DSTACK_GATEWAY_ADMIN_TOKEN"; +const ENV_ADMIN_TOKEN_COMPAT: &str = "ADMIN_API_TOKEN"; +const BASIC_REALM: &str = "dstack-gateway admin"; + +pub struct AdminAuthFairing { + /// SHA-256 of the configured token. `None` = auth disabled (insecure mode). + token_hash: Option<[u8; 32]>, +} + +impl AdminAuthFairing { + /// Build a fairing from a resolved plaintext token. Empty disables auth. + pub fn new(token: &str) -> Self { + Self { + token_hash: (!token.is_empty()).then(|| sha256(token.as_bytes())), + } + } + + /// Resolve a token from config + env, applying the auth policy: + /// - `insecure_no_auth = true` → disabled (caller is expected to warn) + /// - else require a non-empty token from `admin_token`, + /// `DSTACK_GATEWAY_ADMIN_TOKEN`, or `ADMIN_API_TOKEN`. + pub fn from_config(config: &AdminConfig) -> Result { + if config.insecure_no_auth { + return Ok(Self { token_hash: None }); + } + let token = if !config.admin_token.is_empty() { + config.admin_token.clone() + } else { + std::env::var(ENV_ADMIN_TOKEN) + .or_else(|_| std::env::var(ENV_ADMIN_TOKEN_COMPAT)) + .unwrap_or_default() + }; + let token = token.trim(); + if token.is_empty() { + bail!( + "admin API is enabled but no admin_token is configured; \ + set core.admin.admin_token, {ENV_ADMIN_TOKEN}, or {ENV_ADMIN_TOKEN_COMPAT}, \ + or set core.admin.insecure_no_auth = true (testing only)" + ); + } + Ok(Self::new(token)) + } + + fn extract_token(req: &Request<'_>) -> Option { + if let Some(t) = req.headers().get_one(HEADER_NAME) { + return Some(t.to_string()); + } + if let Some(auth) = req.headers().get_one("Authorization") { + if let Some(t) = auth.strip_prefix("Bearer ") { + return Some(t.trim().to_string()); + } + if let Some(b64) = auth.strip_prefix("Basic ") { + if let Some(t) = basic_auth_token(b64.trim()) { + return Some(t); + } + } + } + // Query token is intended for browser links to the dashboard, so only + // accept it on GET to avoid leaking via mutating request URIs. + if req.method() == Method::Get { + for field in req.query_fields() { + if field.name.key_lossy().as_str() == QUERY_PARAM { + return Some(field.value.to_string()); + } + } + } + None + } +} + +fn sha256(bytes: &[u8]) -> [u8; 32] { + Sha256::digest(bytes).into() +} + +/// Decode a `Basic` credential and return whichever of user/password is +/// non-empty (we accept either so the browser prompt's two fields are +/// interchangeable for the operator). +fn basic_auth_token(b64: &str) -> Option { + let decoded = BASE64.decode(b64).ok()?; + let text = std::str::from_utf8(&decoded).ok()?; + let (user, pass) = text.split_once(':').unwrap_or((text, "")); + if !pass.is_empty() { + return Some(pass.to_string()); + } + if !user.is_empty() { + return Some(user.to_string()); + } + None +} + +/// 401 response that triggers the browser's native HTTP-auth prompt. +struct Unauthorized; + +impl<'r> Responder<'r, 'static> for Unauthorized { + fn respond_to(self, _req: &'r Request<'_>) -> rocket::response::Result<'static> { + Response::build() + .status(Status::Unauthorized) + .header(Header::new( + "WWW-Authenticate", + format!("Basic realm=\"{BASIC_REALM}\""), + )) + .ok() + } +} + +/// Rebuild the request URI without the `token` query parameter, if present. +/// Returns `None` when there is nothing to strip. +fn strip_token_query(uri: &Origin<'_>) -> Option> { + let query = uri.query()?.as_str(); + let mut kept = Vec::new(); + let mut found = false; + for pair in query.split('&') { + let key = pair.split('=').next().unwrap_or(""); + if key == QUERY_PARAM { + found = true; + } else if !pair.is_empty() { + kept.push(pair); + } + } + if !found { + return None; + } + let path = uri.path().as_str(); + let new_uri = if kept.is_empty() { + path.to_string() + } else { + format!("{}?{}", path, kept.join("&")) + }; + Origin::parse_owned(new_uri).ok() +} + +#[rocket::async_trait] +impl Fairing for AdminAuthFairing { + fn info(&self) -> Info { + Info { + name: "admin auth", + kind: Kind::Request, + } + } + + async fn on_request(&self, req: &mut Request<'_>, _: &mut Data<'_>) { + let Some(expected_hash) = self.token_hash.as_ref() else { + return; + }; + // Avoid infinite re-routing if the fairing fires on the sentinel itself. + if req.uri().path() == UNAUTH_URI { + return; + } + let provided = Self::extract_token(req).unwrap_or_default(); + let provided_hash = sha256(provided.as_bytes()); + let matches: bool = provided_hash.ct_eq(expected_hash).into(); + if !matches { + if let Ok(origin) = Origin::parse_owned(UNAUTH_URI.to_string()) { + req.set_uri(origin); + } + return; + } + // Authorized — strip ?token=... so it doesn't propagate to logs/handlers. + if let Some(stripped) = strip_token_query(req.uri()) { + req.set_uri(stripped); + } + } +} + +// Sentinel 401 handlers for every HTTP method Rocket can dispatch. We have to +// enumerate them because Rocket doesn't support a method-agnostic route. + +#[rocket::get("/__admin_unauthorized")] +fn unauth_get() -> Unauthorized { + Unauthorized +} + +#[rocket::post("/__admin_unauthorized", data = "<_data>")] +fn unauth_post(_data: Data<'_>) -> Unauthorized { + Unauthorized +} + +#[rocket::put("/__admin_unauthorized", data = "<_data>")] +fn unauth_put(_data: Data<'_>) -> Unauthorized { + Unauthorized +} + +#[rocket::patch("/__admin_unauthorized", data = "<_data>")] +fn unauth_patch(_data: Data<'_>) -> Unauthorized { + Unauthorized +} + +#[rocket::delete("/__admin_unauthorized")] +fn unauth_delete() -> Unauthorized { + Unauthorized +} + +#[rocket::options("/__admin_unauthorized")] +fn unauth_options() -> Unauthorized { + Unauthorized +} + +#[rocket::head("/__admin_unauthorized")] +fn unauth_head() -> Unauthorized { + Unauthorized +} + +pub fn routes() -> Vec { + rocket::routes![ + unauth_get, + unauth_post, + unauth_put, + unauth_patch, + unauth_delete, + unauth_options, + unauth_head, + ] +} + +#[cfg(test)] +mod tests { + use super::*; + use rocket::http::{ContentType, Header, Status}; + use rocket::local::asynchronous::Client; + + #[rocket::get("/protected")] + fn protected_get() -> &'static str { + "ok" + } + + #[rocket::post("/protected", data = "<_data>")] + fn protected_post(_data: Data<'_>) -> &'static str { + "ok" + } + + #[rocket::get("/echo?&")] + fn echo(token: Option<&str>, other: Option<&str>) -> String { + format!( + "token={} other={}", + token.unwrap_or(""), + other.unwrap_or("") + ) + } + + async fn make_client(token: &str) -> Client { + let r = rocket::build() + .attach(AdminAuthFairing::new(token)) + .mount("/", routes()) + .mount("/", rocket::routes![protected_get, protected_post, echo]); + Client::tracked(r).await.unwrap() + } + + #[rocket::async_test] + async fn empty_token_disables_auth() { + let client = make_client("").await; + let resp = client.get("/protected").dispatch().await; + assert_eq!(resp.status(), Status::Ok); + let resp = client.post("/protected").dispatch().await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn missing_token_returns_401() { + let client = make_client("s3cret").await; + let resp = client.get("/protected").dispatch().await; + assert_eq!(resp.status(), Status::Unauthorized); + let resp = client.post("/protected").dispatch().await; + assert_eq!(resp.status(), Status::Unauthorized); + } + + #[rocket::async_test] + async fn header_token_accepted() { + let client = make_client("s3cret").await; + let resp = client + .get("/protected") + .header(Header::new(HEADER_NAME, "s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + let resp = client + .post("/protected") + .header(ContentType::JSON) + .header(Header::new(HEADER_NAME, "s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn bearer_token_accepted() { + let client = make_client("s3cret").await; + let resp = client + .get("/protected") + .header(Header::new("Authorization", "Bearer s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn wrong_token_rejected() { + let client = make_client("s3cret").await; + let resp = client + .get("/protected") + .header(Header::new(HEADER_NAME, "wrong")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Unauthorized); + } + + #[rocket::async_test] + async fn header_takes_precedence_over_query() { + let client = make_client("s3cret").await; + // Wrong query token but correct header → authorized. + let resp = client + .get("/protected?token=wrong") + .header(Header::new(HEADER_NAME, "s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn query_token_only_accepted_on_get() { + let client = make_client("s3cret").await; + // GET with ?token= → allowed + let resp = client.get("/protected?token=s3cret").dispatch().await; + assert_eq!(resp.status(), Status::Ok); + // POST with ?token= → rejected (query auth not honored on mutating methods) + let resp = client.post("/protected?token=s3cret").dispatch().await; + assert_eq!(resp.status(), Status::Unauthorized); + } + + #[rocket::async_test] + async fn query_token_stripped_after_auth() { + let client = make_client("s3cret").await; + // Token is stripped → handler sees no `token` param, only `other`. + let resp = client.get("/echo?token=s3cret&other=keep").dispatch().await; + assert_eq!(resp.status(), Status::Ok); + let body = resp.into_string().await.unwrap(); + assert_eq!(body, "token= other=keep"); + } + + #[rocket::async_test] + async fn query_token_stripped_when_authed_via_header() { + let client = make_client("s3cret").await; + let resp = client + .get("/echo?token=anything&other=keep") + .header(Header::new(HEADER_NAME, "s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + let body = resp.into_string().await.unwrap(); + assert_eq!(body, "token= other=keep"); + } + + fn hash_of(fairing: &AdminAuthFairing) -> Option<[u8; 32]> { + fairing.token_hash + } + + #[test] + fn from_config_disabled_when_insecure_flag_set() { + let cfg = AdminConfig { + enabled: true, + admin_token: String::new(), + insecure_no_auth: true, + }; + let fairing = match AdminAuthFairing::from_config(&cfg) { + Ok(f) => f, + Err(e) => panic!("expected Ok, got err: {e}"), + }; + assert!(hash_of(&fairing).is_none()); + } + + #[test] + fn from_config_uses_config_token() { + let cfg = AdminConfig { + enabled: true, + admin_token: "from-config".into(), + insecure_no_auth: false, + }; + let fairing = match AdminAuthFairing::from_config(&cfg) { + Ok(f) => f, + Err(e) => panic!("expected Ok, got err: {e}"), + }; + assert_eq!(hash_of(&fairing), Some(sha256(b"from-config"))); + } + + // Env-touching cases are combined into a single test so cargo's parallel + // runner doesn't race on `DSTACK_GATEWAY_ADMIN_TOKEN` / `ADMIN_API_TOKEN`. + #[test] + fn from_config_env_paths() { + let empty_cfg = AdminConfig { + enabled: true, + admin_token: String::new(), + insecure_no_auth: false, + }; + + // Baseline: no env, no config token → error. + unsafe { + std::env::remove_var(ENV_ADMIN_TOKEN); + std::env::remove_var(ENV_ADMIN_TOKEN_COMPAT); + } + let err = match AdminAuthFairing::from_config(&empty_cfg) { + Err(e) => e, + Ok(_) => panic!("expected error, got Ok"), + }; + assert!(err.to_string().contains("no admin_token is configured")); + + // Primary env var picked up. + unsafe { + std::env::set_var(ENV_ADMIN_TOKEN, "from-env"); + } + let fairing = match AdminAuthFairing::from_config(&empty_cfg) { + Ok(f) => f, + Err(e) => panic!("expected Ok, got err: {e}"), + }; + assert_eq!(hash_of(&fairing), Some(sha256(b"from-env"))); + unsafe { + std::env::remove_var(ENV_ADMIN_TOKEN); + } + + // Compat env var picked up when primary is absent. + unsafe { + std::env::set_var(ENV_ADMIN_TOKEN_COMPAT, "from-compat"); + } + let fairing = match AdminAuthFairing::from_config(&empty_cfg) { + Ok(f) => f, + Err(e) => panic!("expected Ok, got err: {e}"), + }; + assert_eq!(hash_of(&fairing), Some(sha256(b"from-compat"))); + unsafe { + std::env::remove_var(ENV_ADMIN_TOKEN_COMPAT); + } + } + + #[rocket::async_test] + async fn unauth_returns_401_on_all_methods() { + let client = make_client("s3cret").await; + // PUT / DELETE / PATCH / OPTIONS to a protected URI with no token + // should be rewritten to the sentinel and return 401, not 404. + for m in [Method::Put, Method::Delete, Method::Patch, Method::Options] { + let resp = client.req(m, "/protected").dispatch().await; + assert_eq!( + resp.status(), + Status::Unauthorized, + "method {m:?} expected 401, got {}", + resp.status() + ); + } + } + + fn basic_header(user: &str, pass: &str) -> Header<'static> { + let creds = format!("{user}:{pass}"); + Header::new("Authorization", format!("Basic {}", BASE64.encode(creds))) + } + + #[rocket::async_test] + async fn basic_auth_password_field_accepted() { + let client = make_client("s3cret").await; + let resp = client + .get("/protected") + .header(basic_header("admin", "s3cret")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn basic_auth_user_field_accepted_when_password_empty() { + let client = make_client("s3cret").await; + // Some browser users paste the token into the username field by mistake. + let resp = client + .get("/protected") + .header(basic_header("s3cret", "")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Ok); + } + + #[rocket::async_test] + async fn basic_auth_wrong_password_rejected() { + let client = make_client("s3cret").await; + let resp = client + .get("/protected") + .header(basic_header("admin", "wrong")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Unauthorized); + } + + #[rocket::async_test] + async fn basic_auth_malformed_rejected() { + let client = make_client("s3cret").await; + // Not valid base64 at all. + let resp = client + .get("/protected") + .header(Header::new("Authorization", "Basic !!not-base64!!")) + .dispatch() + .await; + assert_eq!(resp.status(), Status::Unauthorized); + } + + #[rocket::async_test] + async fn unauthorized_response_includes_www_authenticate() { + let client = make_client("s3cret").await; + let resp = client.get("/protected").dispatch().await; + assert_eq!(resp.status(), Status::Unauthorized); + let www = resp + .headers() + .get_one("WWW-Authenticate") + .expect("missing WWW-Authenticate header"); + assert!( + www.starts_with("Basic realm="), + "expected Basic challenge, got {www:?}" + ); + assert!(www.contains("dstack-gateway admin")); + } +} diff --git a/gateway/src/config.rs b/gateway/src/config.rs index 9f57f984..68db41c8 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -286,6 +286,15 @@ impl Config { #[derive(Debug, Clone, Deserialize)] pub struct AdminConfig { pub enabled: bool, + /// Shared secret required to call any admin endpoint (RPC + dashboard). + /// Can also be supplied via `DSTACK_GATEWAY_ADMIN_TOKEN` / `ADMIN_API_TOKEN` + /// env vars. Required unless `insecure_no_auth = true`. + #[serde(default)] + pub admin_token: String, + /// Disable authentication entirely. Development/testing only; never enable + /// on an admin interface that is reachable from the network. + #[serde(default)] + pub insecure_no_auth: bool, } #[derive(Debug, Clone, Deserialize)] diff --git a/gateway/src/main.rs b/gateway/src/main.rs index 1349d966..032303be 100644 --- a/gateway/src/main.rs +++ b/gateway/src/main.rs @@ -24,6 +24,7 @@ use main_service::{Proxy, ProxyOptions, RpcHandler}; use crate::debug_service::DebugRpcHandler; +mod admin_auth; mod admin_service; mod cert_store; mod config; @@ -275,7 +276,12 @@ async fn main() -> Result<()> { }; let proxy_config = config.proxy.clone(); let pccs_url = config.pccs_url.clone(); - let admin_enabled = config.admin.enabled; + let admin_auth = if config.admin.enabled { + Some(admin_auth::AdminAuthFairing::from_config(&config.admin)?) + } else { + None + }; + let admin_insecure = config.admin.insecure_no_auth; let debug_config = config.debug.clone(); let state = Proxy::new(ProxyOptions { config, @@ -320,8 +326,17 @@ async fn main() -> Result<()> { let admin_state = state.clone(); let debug_state = state; let admin_srv = async move { - if admin_enabled { + if let Some(auth_fairing) = admin_auth { + if admin_insecure { + tracing::warn!( + "admin server running with insecure_no_auth = true; admin API is exposed without authentication" + ); + } else { + tracing::info!("admin server authentication enabled"); + } rocket::custom(admin_figment) + .attach(auth_fairing) + .mount("/", admin_auth::routes()) .mount("/", web_routes::routes()) .mount("/", prpc!(Proxy, AdminRpcHandler, trim: "Admin.")) .mount("/prpc", prpc!(Proxy, AdminRpcHandler, trim: "Admin.")) diff --git a/gateway/test-run/e2e/configs/gateway-1.toml b/gateway/test-run/e2e/configs/gateway-1.toml index dfbe1609..dc8e6f3b 100644 --- a/gateway/test-run/e2e/configs/gateway-1.toml +++ b/gateway/test-run/e2e/configs/gateway-1.toml @@ -19,6 +19,7 @@ rpc_domain = "gateway-1" enabled = true port = 9016 address = "0.0.0.0" +admin_token = "e2e-admin-token" [core.debug] insecure_enable_debug_rpc = true diff --git a/gateway/test-run/e2e/configs/gateway-2.toml b/gateway/test-run/e2e/configs/gateway-2.toml index b825fda5..c733710b 100644 --- a/gateway/test-run/e2e/configs/gateway-2.toml +++ b/gateway/test-run/e2e/configs/gateway-2.toml @@ -19,6 +19,7 @@ rpc_domain = "gateway-2" enabled = true port = 9016 address = "0.0.0.0" +admin_token = "e2e-admin-token" [core.debug] insecure_enable_debug_rpc = true diff --git a/gateway/test-run/e2e/configs/gateway-3.toml b/gateway/test-run/e2e/configs/gateway-3.toml index f30cb6a1..b02a57fc 100644 --- a/gateway/test-run/e2e/configs/gateway-3.toml +++ b/gateway/test-run/e2e/configs/gateway-3.toml @@ -19,6 +19,7 @@ rpc_domain = "gateway-3" enabled = true port = 9016 address = "0.0.0.0" +admin_token = "e2e-admin-token" [core.debug] insecure_enable_debug_rpc = true diff --git a/gateway/test-run/e2e/test.sh b/gateway/test-run/e2e/test.sh index 9c1db1a2..1ed9df3f 100755 --- a/gateway/test-run/e2e/test.sh +++ b/gateway/test-run/e2e/test.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-FileCopyrightText: 2024-2025 Phala Network # # SPDX-License-Identifier: Apache-2.0 @@ -22,6 +22,10 @@ GATEWAY_PROXIES="gateway-1:9014 gateway-2:9014 gateway-3:9014" GATEWAY_DEBUG_URLS="http://gateway-1:9015 http://gateway-2:9015 http://gateway-3:9015" GATEWAY_ADMIN="http://gateway-1:9016" +# Must match `admin_token` in configs/gateway-*.toml +ADMIN_TOKEN="e2e-admin-token" +ADMIN_AUTH_HEADER="Authorization: Bearer ${ADMIN_TOKEN}" + # External services MOCK_CF_API="http://mock-cf-dns-api:8080" PEBBLE_DIR="http://pebble:14000/dir" @@ -75,27 +79,6 @@ run_test() { fi } -# Wait for HTTP service to respond -wait_for_service() { - local url="$1" - local name="$2" - local max_wait="${3:-60}" - local waited=0 - - log_info "Waiting for $name..." - while [ $waited -lt $max_wait ]; do - if curl -sf "$url" > /dev/null 2>&1; then - log_info "$name is ready" - return 0 - fi - sleep 2 - waited=$((waited + 2)) - done - - log_error "$name failed to become ready within ${max_wait}s" - return 1 -} - # ==================== Domain Helpers ==================== # Convert base domain to test SNI: test0.local -> gateway.test0.local @@ -158,7 +141,8 @@ test_certificates_match() { test_certificate_from_pebble() { local sni="$1" - local proxy=$(echo "$GATEWAY_PROXIES" | cut -d' ' -f1) + local proxy + proxy=$(echo "$GATEWAY_PROXIES" | cut -d' ' -f1) get_cert_issuer "$proxy" "$sni" | grep -qi "pebble" } @@ -183,6 +167,7 @@ setup_certbot_config() { # Set ACME URL log_info "Setting ACME URL: ${ACME_URL}" if ! curl -sf -X POST "${GATEWAY_ADMIN}/prpc/Admin.SetCertbotConfig" \ + -H "${ADMIN_AUTH_HEADER}" \ -H "Content-Type: application/json" \ -d '{"acme_url": "'"${ACME_URL}"'"}' > /dev/null; then log_error "Failed to set certbot config" @@ -192,6 +177,7 @@ setup_certbot_config() { # Create DNS credential log_info "Creating DNS credential..." if ! curl -sf -X POST "${GATEWAY_ADMIN}/prpc/Admin.CreateDnsCredential" \ + -H "${ADMIN_AUTH_HEADER}" \ -H "Content-Type: application/json" \ -d '{ "name": "test-cloudflare", @@ -210,11 +196,13 @@ setup_certbot_config() { for domain in $CERT_DOMAINS; do log_info "Adding domain: $domain" curl -sf -X POST "${GATEWAY_ADMIN}/prpc/Admin.AddZtDomain" \ + -H "${ADMIN_AUTH_HEADER}" \ -H "Content-Type: application/json" \ -d '{"domain": "'"${domain}"'"}' > /dev/null || true log_info "Triggering renewal for: $domain" curl -sf -X POST "${GATEWAY_ADMIN}/prpc/Admin.RenewZtDomainCert" \ + -H "${ADMIN_AUTH_HEADER}" \ -H "Content-Type: application/json" \ -d '{"domain": "'"${domain}"'", "force": true}' > /dev/null || \ log_warn "Renewal request failed for $domain (may retry)" @@ -223,6 +211,31 @@ setup_certbot_config() { return 0 } +# Returns 0 if HTTP status code from $1 args equals $2. +http_status_eq() { + local expected="$1" + shift + local actual + actual=$(curl -s -o /dev/null -w '%{http_code}' "$@") + [ "$actual" = "$expected" ] +} + +# Returns 0 if all three admin auth checks pass: missing 401, wrong 401, right 200. +test_admin_auth() { + log_info "checking admin auth on ${GATEWAY_ADMIN}" + # Missing token → 401 + http_status_eq 401 "${GATEWAY_ADMIN}/prpc/Admin.Status" \ + || { log_error "no-token request did not return 401"; return 1; } + # Wrong token → 401 + http_status_eq 401 "${GATEWAY_ADMIN}/prpc/Admin.Status" \ + -H "Authorization: Bearer wrong-token" \ + || { log_error "wrong-token request did not return 401"; return 1; } + # Correct token → 200 + http_status_eq 200 "${GATEWAY_ADMIN}/prpc/Admin.Status" \ + -H "${ADMIN_AUTH_HEADER}" \ + || { log_error "valid-token request did not return 200"; return 1; } +} + # ==================== Main ==================== main() { @@ -241,17 +254,23 @@ main() { i=$((i + 1)) done - # Phase 3: Configure certbot - log_phase 3 "Configure certbot" + # Phase 3: Admin auth gating + log_phase 3 "Admin token auth" + run_test "Admin endpoint accepts valid token and rejects missing/wrong" \ + "$(test_admin_auth; echo $?)" + + # Phase 4: Configure certbot + log_phase 4 "Configure certbot" if ! setup_certbot_config; then log_error "Failed to setup certbot configuration" fi - # Phase 4: Certificate issuance - log_phase 4 "Certificate issuance" - local first_domain=$(echo "$CERT_DOMAINS" | cut -d' ' -f1) - local first_sni=$(get_test_sni "$first_domain") - local first_proxy=$(echo "$GATEWAY_PROXIES" | cut -d' ' -f1) + # Phase 5: Certificate issuance + log_phase 5 "Certificate issuance" + local first_domain first_sni first_proxy + first_domain=$(echo "$CERT_DOMAINS" | cut -d' ' -f1) + first_sni=$(get_test_sni "$first_domain") + first_proxy=$(echo "$GATEWAY_PROXIES" | cut -d' ' -f1) log_info "Waiting for certificates (up to 120s)..." local waited=0 @@ -265,8 +284,9 @@ main() { log_info "Waiting... (${waited}s)" done + local sni wildcard for domain in $CERT_DOMAINS; do - local sni=$(get_test_sni "$domain") + sni=$(get_test_sni "$domain") run_test "Certificate issued for $domain" \ "$(test_certificate_issued "$first_proxy" "$sni"; echo $?)" done @@ -274,30 +294,31 @@ main() { log_info "Waiting 20s for cluster sync..." sleep 20 - # Phase 5: Certificate consistency - log_phase 5 "Certificate consistency" + # Phase 6: Certificate consistency + log_phase 6 "Certificate consistency" for domain in $CERT_DOMAINS; do - local sni=$(get_test_sni "$domain") + sni=$(get_test_sni "$domain") run_test "All gateways have same cert for $domain" \ "$(test_certificates_match "$sni"; echo $?)" run_test "Cert for $domain issued by Pebble" \ "$(test_certificate_from_pebble "$sni"; echo $?)" done - # Phase 6: SNI-based selection - log_phase 6 "SNI-based certificate selection" + # Phase 7: SNI-based selection + log_phase 7 "SNI-based certificate selection" for domain in $CERT_DOMAINS; do - local sni=$(get_test_sni "$domain") - local wildcard=$(get_wildcard_domain "$domain") + sni=$(get_test_sni "$domain") + wildcard=$(get_wildcard_domain "$domain") run_test "SNI $sni returns $wildcard cert" \ "$(test_sni_cert_selection "$first_proxy" "$sni" "$wildcard"; echo $?)" done - # Phase 7: Proxy TLS health - log_phase 7 "Proxy TLS health endpoint" + # Phase 8: Proxy TLS health + log_phase 8 "Proxy TLS health endpoint" + local i for domain in $CERT_DOMAINS; do - local sni=$(get_test_sni "$domain") - local i=1 + sni=$(get_test_sni "$domain") + i=1 for proxy in $GATEWAY_PROXIES; do run_test "Gateway $i TLS health ($sni)" \ "$(test_proxy_tls_health "$proxy" "$sni"; echo $?)" @@ -305,9 +326,10 @@ main() { done done - # Phase 8: DNS records (informational) - log_phase 8 "DNS-01 challenge records" - local records=$(curl -sf "${MOCK_CF_API}/api/records" 2>/dev/null || echo "") + # Phase 9: DNS records (informational) + log_phase 9 "DNS-01 challenge records" + local records + records=$(curl -sf "${MOCK_CF_API}/api/records" 2>/dev/null || echo "") if echo "$records" | grep -q "TXT"; then log_success "DNS TXT records found" else