Skip to content

fix: reorder Vue component #891

fix: reorder Vue component

fix: reorder Vue component #891

name: Update LLM benchmarks
on:
workflow_dispatch:
inputs:
pr_number:
description: "Pull Request Number"
required: true
issue_comment:
types: [created] # only run when the comment is first created
permissions:
contents: read
pull-requests: write
issues: write
concurrency:
group: >-
llm-benchmark
-${{ github.event_name == 'issue_comment' && github.event.issue.number || inputs.pr_number }}
${{ github.event_name == 'issue_comment' && !startsWith(github.event.comment.body, '/update-llm-benchmark') && '-unrelated-comment' }}
cancel-in-progress: true
jobs:
update-llm-benchmark:
# Runnable either with a comment that starts with /update-llm-benchmark
# or by manually dispatching
if: |
(github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/update-llm-benchmark')) ||
(github.event_name == 'workflow_dispatch')
runs-on: spacetimedb-new-runner
container:
image: localhost:5000/spacetimedb-ci:latest
options: >-
--privileged
steps:
# Here we install the spacetime CLI for faster execution of the tests
# SpacetimeDB itself is not under test here, rather it's the docs.
# If we want to change that it is possible to have the benchmark compile
# SpacetimeDB from source.
- name: Install spacetime CLI
run: |
curl -sSf https://install.spacetimedb.com | sh -s -- -y
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Load PR info
id: pr
uses: actions/github-script@v7
with:
script: |
let prNumber;
if (context.eventName === 'issue_comment') {
prNumber = context.payload.issue.number;
} else if (context.eventName === 'workflow_dispatch') {
const raw = context.payload.inputs?.pr_number;
if (!raw || !/^\d+$/.test(raw)) {
core.setFailed(`Invalid pr_number input: '${raw}'.`);
return;
}
prNumber = Number(raw);
} else {
core.setFailed(`Unsupported event: ${context.eventName}`);
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});
core.setOutput('number', String(prNumber));
core.setOutput('head_ref', pr.head.ref);
core.setOutput('head_sha', pr.head.sha);
core.setOutput('head_repo_full_name', pr.head.repo.full_name);
core.setOutput('head_owner_type', pr.head.repo.owner.type); // "User"|"Organization"
core.setOutput('maintainer_can_modify', String(pr.maintainer_can_modify));
# If this was kicked off by a comment, ensure that the commenter is
# a collaborator on the repo. We don't want unprivileged users to run benchmarks.
# Note that the workflow that will be run will be the one that is on the `master`
# branch, NOT the one from the PR. This is important so that the PR author can't
# sneak in an exfiltration exploit.
- name: Check commenter permission
if: github.event_name == 'issue_comment'
uses: actions/github-script@v7
with:
script: |
const user = context.payload.comment.user.login;
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: user,
});
const allowed = new Set(['admin', 'maintain', 'write', 'triage']);
if (!allowed.has(data.permission)) {
core.setFailed(`User ${user} has permission '${data.permission}', not allowed to run benchmarks.`);
}
# If the PR is from a fork, we need to be able to have GitHub actions commit back
# to the forked repo, so that we can update the benchmark results.
# In order to do this we need to ensure that the PR is configured to allow the maintainers
# of the SpacetimeDB repo to commit back ot the fork.
- name: Check fork pushability (and comment if not)
if: steps.pr.outputs.head_repo_full_name != github.repository
uses: actions/github-script@v7
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
HEAD_OWNER_TYPE: ${{ steps.pr.outputs.head_owner_type }}
MAINTAINER_CAN_MODIFY: ${{ steps.pr.outputs.maintainer_can_modify }}
with:
script: |
const issue_number = Number(process.env.PR_NUMBER);
const headOwnerType = process.env.HEAD_OWNER_TYPE;
const canModify = process.env.MAINTAINER_CAN_MODIFY === 'true';
if (headOwnerType === 'Organization') {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number,
body: [
"I can’t push benchmark updates to this PR because it comes from an **organization-owned fork**.",
"GitHub doesn’t allow granting upstream maintainers push permissions to org-owned forks.",
"",
"Options:",
"- Reopen the PR from a **personal fork** with **Allow edits from maintainers** enabled, or",
"- A maintainer can apply the benchmark update on an internal branch."
].join("\n"),
});
core.setFailed("Org-owned fork PR is not pushable by maintainers.");
return;
}
if (!canModify) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number,
body: [
"I can’t push benchmark updates to this PR branch until you enable **Allow edits from maintainers**.",
"Please check the box on the PR page, then re-comment `/update-llm-benchmark`.",
"See https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork"
].join("\n"),
});
core.setFailed("maintainer_can_modify is false; author must enable 'Allow edits from maintainers'.");
}
# Run the benchmark that is already checked into master to prevent
# an exfiltration attack whereby the PR author tries to sneak in an exploit
# and get a maintainer to run the modified benchmark without looking at the
# PR first. This ensure that we only ever execute code that is checked into
# master.
- name: Checkout master (build/install tool from trusted code)
uses: actions/checkout@v4
with:
ref: master
fetch-depth: 0
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
# Ensure we use a user-writable .NET install (not /usr/share/dotnet),
# so workload installs don't require sudo.
- name: Setup .NET SDK
uses: actions/setup-dotnet@v4
with:
dotnet-version: "8.0.x"
- name: Install WASI workload (wasi-experimental)
env:
DOTNET_MULTILEVEL_LOOKUP: "0"
DOTNET_CLI_HOME: ${{ runner.temp }}/dotnet-home
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: "1"
run: |
dotnet --info
dotnet workload install wasi-experimental --skip-manifest-update --disable-parallel
- name: Install llm-benchmark tool from master
run: |
cargo install --path tools/xtask-llm-benchmark --locked
command -v llm_benchmark
# Check out the repo on the branch, but ONLY use this code as data!
# Never execute code that is on the PR branch.
- name: Checkout PR head (branch)
uses: actions/checkout@v4
with:
repository: ${{ steps.pr.outputs.head_repo_full_name }}
ref: ${{ steps.pr.outputs.head_sha }}
fetch-depth: 0
persist-credentials: false
# Run the benchmark against the PR using the installed tool from the
# master branch.
- name: Run benchmark (with provider keys)
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# Prevent MSBuild node reuse issues that cause "Pipe is broken" errors
# when running multiple dotnet publish commands in parallel.
# See: https://github.com/dotnet/msbuild/issues/6657
MSBUILDDISABLENODEREUSE: "1"
DOTNET_CLI_USE_MSBUILD_SERVER: "0"
run: |
llm_benchmark ci-quickfix
llm_benchmark ci-check
# Generate failure analysis if there are any failures
- name: Generate failure analysis
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
llm_benchmark analyze -o docs/llms/docs-benchmark-analysis.md || true
# Generate PR comment markdown (compares against master baseline)
- name: Generate PR comment markdown
run: |
llm_benchmark ci-comment
- name: Ensure only docs/llms changed
run: |
set -euo pipefail
CHANGED="$(git diff --name-only)"
if [ -z "$CHANGED" ]; then
echo "No changes."
exit 0
fi
if echo "$CHANGED" | grep -qvE '^docs/llms/'; then
echo "Benchmark produced changes outside docs/llms:"
echo "$CHANGED" | grep -vE '^docs/llms/'
exit 1
fi
# Comment the benchmark results on the PR
- name: Comment benchmark results on PR
uses: actions/github-script@v7
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
with:
github-token: ${{ secrets.CLOCKWORK_LABS_BOT_PAT }}
script: |
const fs = require('fs');
// Read the pre-generated comment markdown
const commentPath = 'docs/llms/docs-benchmark-comment.md';
if (!fs.existsSync(commentPath)) {
core.setFailed(`Comment file not found: ${commentPath}`);
return;
}
let body = fs.readFileSync(commentPath, 'utf8');
// Check if failure analysis exists and append it
const analysisPath = 'docs/llms/docs-benchmark-analysis.md';
if (fs.existsSync(analysisPath)) {
const analysis = fs.readFileSync(analysisPath, 'utf8');
// Only include if there's meaningful content (not just "no failures")
if (!analysis.includes('No failures found')) {
body += `\n<details>\n<summary>Failure Analysis (click to expand)</summary>\n\n${analysis}\n</details>`;
}
}
const issue_number = Number(process.env.PR_NUMBER);
// Always post a new comment
console.log(`Posting new comment on PR #${issue_number}...`);
try {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number,
body,
});
console.log('Comment created successfully');
} catch (err) {
console.error('Failed to post comment:', err.message);
console.error('Full error:', JSON.stringify(err, null, 2));
throw err;
}
# The benchmarks only modify the docs/llms directory.
# Commit the changes.
- name: Commit changes
run: |
git config user.name "clockwork-labs-bot"
git config user.email "clockwork-labs-bot@users.noreply.github.com"
# Prefer staging only the benchmark output area (adjust as needed)
git add docs/llms
git diff --cached --quiet && exit 0
git commit -m "Update LLM benchmark results"
# Here we use the https://github.com/clockwork-labs-bot user's
# personal access token to commit back to the PR branch. This is necessary
# if we want to be able to push back to external contributor forks.
- name: Push back to PR branch (same repo or fork)
env:
GH_TOKEN: ${{ secrets.CLOCKWORK_LABS_BOT_PAT }}
run: |
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ steps.pr.outputs.head_repo_full_name }}.git"
# Fetch and rebase in case branch moved since workflow started (e.g., previous benchmark run)
git fetch origin "${{ steps.pr.outputs.head_ref }}"
if ! git rebase "origin/${{ steps.pr.outputs.head_ref }}"; then
git rebase --abort
echo "::error::Rebase failed due to conflicts. The PR branch may have been updated during the benchmark run. Please re-run /update-llm-benchmark."
exit 1
fi
git push origin "HEAD:${{ steps.pr.outputs.head_ref }}"