Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api/pkg/di/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ type Container struct {
eventDispatcher *services.EventDispatcher
logger telemetry.Logger
attachmentRepository repositories.AttachmentRepository
userRistrettoCache *ristretto.Cache[string, entities.AuthContext]
}

// NewLiteContainer creates a Container without any routes or listeners
Expand Down Expand Up @@ -1730,8 +1731,11 @@ func (container *Container) PhoneRistrettoCache() (cache *ristretto.Cache[string
}

// UserRistrettoCache creates an in-memory *ristretto.Cache[string, entities.AuthContext]
func (container *Container) UserRistrettoCache() (cache *ristretto.Cache[string, entities.AuthContext]) {
container.logger.Debug(fmt.Sprintf("creating %T", cache))
func (container *Container) UserRistrettoCache() *ristretto.Cache[string, entities.AuthContext] {
if container.userRistrettoCache != nil {
return container.userRistrettoCache
}
container.logger.Debug(fmt.Sprintf("creating %T", container.userRistrettoCache))
ristrettoCache, err := ristretto.NewCache[string, entities.AuthContext](&ristretto.Config[string, entities.AuthContext]{
MaxCost: 5000,
NumCounters: 5000 * 10,
Expand All @@ -1740,6 +1744,7 @@ func (container *Container) UserRistrettoCache() (cache *ristretto.Cache[string,
if err != nil {
container.logger.Fatal(stacktrace.Propagate(err, "cannot create user ristretto cache"))
}
container.userRistrettoCache = ristrettoCache
return ristrettoCache
}

Expand Down
12 changes: 12 additions & 0 deletions api/pkg/repositories/gorm_user_repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ func (repository *gormUserRepository) RotateAPIKey(ctx context.Context, userID e
}

user := new(entities.User)
var oldAPIKey string
err = crdbgorm.ExecuteTx(ctx, repository.db, nil,
func(tx *gorm.DB) error {
if err := tx.WithContext(ctx).Where("id = ?", userID).First(user).Error; err != nil {
return err
}
oldAPIKey = user.APIKey
return tx.WithContext(ctx).Model(user).
Clauses(clause.Returning{}).
Where("id = ?", userID).
Expand All @@ -78,6 +83,13 @@ func (repository *gormUserRepository) RotateAPIKey(ctx context.Context, userID e
return nil, repository.tracer.WrapErrorSpan(span, stacktrace.PropagateWithCode(err, ErrCodeNotFound, msg))
}

if err == nil && oldAPIKey != "" {
// Flush pending ristretto Set operations before Del to avoid a
// buffered Set re-adding the entry after removal.
repository.cache.Wait()
repository.cache.Del(oldAPIKey)
}
Comment on lines +86 to +91
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Cache invalidation is local-only in multi-instance deployments

repository.cache is a ristretto in-process *ristretto.Cache, so cache.Del(oldAPIKey) only removes the entry from the cache of the single API server instance that handled the rotate request. In a horizontally-scaled deployment with multiple replicas, every other instance will continue to serve the old key as valid for up to the 2-hour TTL. The fix completely resolves the issue on a single-instance setup, but is only a partial mitigation when running more than one replica.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledged — this is an inherent limitation of in-process ristretto caches in multi-instance deployments and is pre-existing behavior (same applies to the phone API key cache). The issue specifically targets single-instance self-hosted setups where this fix fully resolves the problem. A distributed cache invalidation mechanism would be a separate enhancement.


return user, nil
Comment on lines +86 to 93
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — this is actually pre-existing behavior from the original code (the error handling was unchanged). However, it is worth noting that crdbgorm.ExecuteTx returns the error and clause.Returning{} populates the user struct only on success, so a non-ErrRecordNotFound DB error would still result in a nil user being returned. The caller in user_service.go:260 does check err != nil and wraps it. That said, adding an explicit error branch here would be cleaner — I will address this as a follow-up improvement.

}

Expand Down
85 changes: 85 additions & 0 deletions tests/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
Expand Down Expand Up @@ -213,6 +214,90 @@ func TestSendSMS_RateLimit(t *testing.T) {
}
}

func TestRotateAPIKey_InvalidatesCache(t *testing.T) {
ctx := context.Background()

// Use a dedicated test user so we don't mutate the shared userAPIKey
rotateUserAPIKey := "rotate-test-api-key"
rotateUserID := "rotate-test-user-id"

// 1) Confirm the dedicated user's API key works and warm the cache
meURL := apiBaseURL + "/v1/users/me"
req, err := http.NewRequestWithContext(ctx, http.MethodGet, meURL, nil)
require.NoError(t, err)
req.Header.Set("x-api-key", rotateUserAPIKey)

Comment on lines +217 to +229
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 50b3311 — the test now uses a dedicated rotate-test-user seeded in seed.sql with its own API key, so the shared userAPIKey is never mutated.

resp, err := http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode, "initial auth failed: %s", string(body))

// Parse the current API key from the response
var meResp struct {
Data struct {
ID string `json:"id"`
APIKey string `json:"api_key"`
} `json:"data"`
}
require.NoError(t, json.Unmarshal(body, &meResp))
require.Equal(t, rotateUserID, meResp.Data.ID)
oldAPIKey := meResp.Data.APIKey
require.NotEmpty(t, oldAPIKey)
t.Logf("user ID: %s, old API key prefix: %s...", rotateUserID, oldAPIKey[:10])

// 2) Rotate the API key
rotateURL := fmt.Sprintf("%s/v1/users/%s/api-keys", apiBaseURL, rotateUserID)
req, err = http.NewRequestWithContext(ctx, http.MethodDelete, rotateURL, nil)
require.NoError(t, err)
req.Header.Set("x-api-key", rotateUserAPIKey)

resp, err = http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

body, err = io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode, "rotate failed: %s", string(body))

// Parse new API key from rotate response
var rotateResp struct {
Data struct {
APIKey string `json:"api_key"`
} `json:"data"`
}
require.NoError(t, json.Unmarshal(body, &rotateResp))
newAPIKey := rotateResp.Data.APIKey
require.NotEmpty(t, newAPIKey)
require.NotEqual(t, oldAPIKey, newAPIKey, "API key should have changed after rotation")
t.Logf("new API key prefix: %s...", newAPIKey[:10])

// 3) Old API key should immediately fail (401) — this is the bug regression check
req, err = http.NewRequestWithContext(ctx, http.MethodGet, meURL, nil)
require.NoError(t, err)
req.Header.Set("x-api-key", oldAPIKey)

resp, err = http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusUnauthorized, resp.StatusCode, "old API key should return 401 after rotation")

// 4) New API key should work
req, err = http.NewRequestWithContext(ctx, http.MethodGet, meURL, nil)
require.NoError(t, err)
req.Header.Set("x-api-key", newAPIKey)

resp, err = http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

body, err = io.ReadAll(resp.Body)
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode, "new API key should work: %s", string(body))
}

func TestSendSMS_OutstandingFlow(t *testing.T) {
ctx := context.Background()
phone := setupPhone(ctx, t, 60)
Comment on lines 214 to 303
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Test permanently invalidates the shared userAPIKey credential

TestRotateAPIKey_InvalidatesCache rotates the API key associated with userAPIKey ("test-user-api-key"), which is a package-level constant used by every other integration test in the suite via newAPIClient() and direct req.Header.Set("x-api-key", userAPIKey) calls (e.g. in pollMessageStatus, setupPhone, TestSendSMS_OutstandingFlow). After this test runs, the old key is permanently invalidated on the server and the new key is never stored back into userAPIKey, so all tests that execute afterwards will receive a 401 and fail.

The test needs to either (a) restore the original key at the end via t.Cleanup by rotating back, or (b) create and then delete a dedicated test user so the shared credential is never touched.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 50b3311 — added a dedicated rotate-test-user in seed.sql with its own API key (rotate-test-api-key). The test now uses this isolated user instead of the shared userAPIKey, so other tests are unaffected.

Expand Down
12 changes: 12 additions & 0 deletions tests/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ VALUES (
NOW()
) ON CONFLICT (id) DO NOTHING;

-- Test user for API key rotation tests (isolated to avoid mutating the shared test user)
INSERT INTO users (id, email, api_key, timezone, subscription_name, created_at, updated_at)
VALUES (
'rotate-test-user-id',
'rotate-test@httpsms.com',
'rotate-test-api-key',
'UTC',
'pro-monthly',
NOW(),
NOW()
) ON CONFLICT (id) DO NOTHING;

-- System user (for event queue auth)
INSERT INTO users (id, email, api_key, timezone, subscription_name, created_at, updated_at)
VALUES (
Expand Down
Loading