add user scope

This commit is contained in:
2026-01-12 21:07:05 +08:00
parent 6bc93a2476
commit ce5c764402
13 changed files with 291 additions and 222 deletions

View File

@@ -20,4 +20,4 @@ jobs:
- name: Run linter
uses: golangci/golangci-lint-action@v8
with:
version: v2.1.0
version: v2.7.2

View File

@@ -1,32 +0,0 @@
name: E2E Tests
on:
push:
pull_request:
jobs:
test-e2e:
name: Run on Ubuntu
runs-on: ubuntu-latest
steps:
- name: Clone the code
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod
- name: Install the latest version of kind
run: |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/latest/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
- name: Verify kind installation
run: kind version
- name: Running Test e2e
run: |
go mod tidy
make test-e2e

View File

@@ -20,4 +20,4 @@ jobs:
- name: Running Tests
run: |
go mod tidy
make test
make test ENVTEST_K8S_VERSION=1.31

View File

@@ -242,7 +242,7 @@ CONTROLLER_TOOLS_VERSION ?= v0.18.0
ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}')
#ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31)
ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}')
GOLANGCI_LINT_VERSION ?= v2.1.0
GOLANGCI_LINT_VERSION ?= v2.7.2
.PHONY: kustomize
kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.

View File

@@ -13,8 +13,9 @@ metadata:
name: my-repo-runner-1
namespace: gitea-runner-system
spec:
scope: repo
org: myorg # optional; ommited if scope == global
scope: repo # valid options: global, org or user, repo
org: myorg # optional; ommited if scope == global; mutually exclusive with user
user: myusername # optional; ommited if scope == global; mutually exclusive with org
repo: myreponame # optional; ommited if scope == org || scope == global
gitea:
url: https://gitea.bpg.pw

View File

@@ -32,14 +32,16 @@ const (
RunnerGroupScopeGlobal RunnerGroupScope = "global"
// RunnerGroupScopeOrg means the runner group is scoped to an organization
RunnerGroupScopeOrg RunnerGroupScope = "org"
// RunnerGroupScopeUser means the runner group is scoped to a user
RunnerGroupScopeUser RunnerGroupScope = "user"
// RunnerGroupScopeRepo means the runner group is scoped to a repository
RunnerGroupScopeRepo RunnerGroupScope = "repo"
)
// RunnerGroupSpec defines the desired state of RunnerGroup.
type RunnerGroupSpec struct {
// Scope defines the scope of the runner (global, org, repo)
// +kubebuilder:validation:Enum=global;org;repo
// Scope defines the scope of the runner (global, org, user, repo)
// +kubebuilder:validation:Enum=global;org;user;repo
// +kubebuilder:validation:Required
Scope RunnerGroupScope `json:"scope"`
@@ -47,6 +49,10 @@ type RunnerGroupSpec struct {
// +optional
Org string `json:"org,omitempty"`
// User is required if scope is 'user'
// +optional
User string `json:"user,omitempty"`
// Repo is required if scope is 'repo'
// +optional
Repo string `json:"repo,omitempty"`

View File

@@ -5,7 +5,7 @@ kind: Kustomization
images:
- name: controller
newName: ghcr.io/bapung/gitea-runner-operator
newTag: sha-13f04e1
newTag: sha-6bc93a2
patchesStrategicMerge:
- image_pull_secret_patch.yaml

View File

@@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/managed-by: kustomize
stringData:
# The Gitea API Token (for the Operator to poll for jobs)
auth-token: "3430680995113a33a17715bb552882d504f5cf98"
auth-token: "MMUCFRXCbofYn2L0aT2OP2aug7JhChNJlULKNLgg"
# The Runner Registration Token (for the Runner to register itself)
registration-token: "5r4lpLA9rKCZZEHyUyKHeA187DoaElcTBySITRRi"
---
@@ -23,9 +23,10 @@ spec:
giteaURL: "https://gitea.bpg.pw"
# Scope of the runners (global, org, or repo)
scope: "repo"
scope: "org"
org: "bapung" # Required if scope is 'org' or 'repo'
repo: "dummy-service-workflow" # Required if scope is 'repo'
user: "" # Required if scope is 'user' or 'repo'
#repo: "dummy-service-workflow" # Required if scope is 'repo'
# Labels to identify this runner group
labels:

View File

@@ -30,18 +30,23 @@ type RunnerGroupScope string
const (
RunnerGroupScopeGlobal RunnerGroupScope = "global"
RunnerGroupScopeOrg RunnerGroupScope = "org"
RunnerGroupScopeUser RunnerGroupScope = "user"
RunnerGroupScopeRepo RunnerGroupScope = "repo"
)
type RunnerGroupSpec struct {
// Scope defines the scope of the runner (global, org, repo)
// +kubebuilder:validation:Enum=global;org;repo
// Scope defines the scope of the runner (global, org, user, repo)
// +kubebuilder:validation:Enum=global;org;user;repo
Scope RunnerScope `json:"scope"`
// Org is required if scope is 'org'
// +optional
Org string `json:"org,omitempty"`
// User is required if scope is 'user'
// +optional
User string `json:"user,omitempty"`
// Repo is required if scope is 'repo'
// +optional
Repo string `json:"repo,omitempty"`
@@ -49,7 +54,8 @@ type RunnerGroupSpec struct {
// GiteaURL is the base URL of the Gitea instance
GiteaURL string `json:"giteaURL"`
// Labels to assign to the runner
// Labels to assign to the runner.
// Defaults (e.g. ubuntu-latest) are merged automatically by the controller.
// +optional
Labels []string `json:"labels,omitempty"`
@@ -79,154 +85,103 @@ type RunnerGroupStatus struct {
## 4. Controller Implementation (`internal/controller/runnergroup_controller.go`)
The controller handles the reconciliation loop.
The controller handles the reconciliation loop and manages the lifecycle of ephemeral runners.
### 4.1 RBAC Permissions
### 4.1 Struct Definition
Add markers to generate RBAC roles:
The reconciler includes a thread-safe map to cache spawned jobs and prevent duplicate scheduling.
```go
// +kubebuilder:rbac:groups=gitea.bpg.pw,resources=runnergroups,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=gitea.bpg.pw,resources=runnergroups/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
type RunnerGroupReconciler struct {
client.Client
Scheme *runtime.Scheme
GiteaClient gitea.Client
SpawnedJobsCache sync.Map // Stores [int64]time.Time (JobID -> SpawnTime)
}
```
### 4.2 Reconcile Logic
The `Reconcile` function should follow this flow:
The `Reconcile` function follows this flow:
1. **Fetch RunnerGroup**: Get the `RunnerGroup` CR instance. If not found, ignore (deleted).
2. **List Jobs**: List all `batchv1.Job` resources in the same namespace that are owned by this RunnerGroup.
- Filter by label `gitea.bpg.pw/runnergroup-name=<runnergroup-name>`.
3. **Update Status**: Update `status.activeRunners` with the count of non-completed jobs.
4. **Capacity Check**:
- If `activeRunners >= spec.maxActiveRunners`, stop and requeue.
5. **Poll Gitea**:
- Retrieve the Auth Token from the Secret referenced in `spec.authToken`.
- Instantiate a Gitea API Client.
- Query for queued workflow runs matching the scope and labels.
6. **Scale Up**:
- Calculate `needed = count(queued_jobs)`.
- Calculate `available_slots = spec.maxActiveRunners - activeRunners`.
- `to_spawn = min(needed, available_slots)`.
- Loop `to_spawn` times:
- Create a new `batchv1.Job`.
7. **Requeue**: Return `ctrl.Result{RequeueAfter: 10 * time.Second}` to ensure continuous polling.
1. **Fetch RunnerGroup**: Get the `RunnerGroup` CR instance.
2. **List Jobs**: List all `batchv1.Job` resources owned by this CR to calculate `activeRunners`.
3. **Update Status**: Update `status.activeRunners`.
4. **Capacity Check**: Stop scaling if `activeRunners >= spec.maxActiveRunners`.
5. **Label Calculation**: Call `getEffectiveLabels` to merge `spec.labels` with hardcoded Gitea defaults (e.g., `ubuntu-latest:docker://node:16-bullseye`).
6. **Poll Gitea**:
- Retrieve Auth Token.
- Call `GiteaClient.GetRunnerStats` with the effective labels.
- This returns a list of `QueuedJobs`.
7. **Scale Up & Deduplication**:
- Iterate through `stats.QueuedJobs`.
- **Check Cache**: If Job ID exists in `SpawnedJobsCache`:
- If TTL (< 5 min) is valid: **Skip** (already handled).
- If TTL expired: **Retry** (assume previous runner failed).
- If Job ID not in cache or expired:
- Check `availableSlots`.
- Retrieve Registration Token (if not yet fetched).
- **Spawn Job**: Create `batchv1.Job`.
- **Update Cache**: Store Job ID in `SpawnedJobsCache`.
- Decrement `availableSlots`.
8. **Cache Cleanup**: Remove IDs from `SpawnedJobsCache` if they are not present in the latest `QueuedJobs` list from Gitea.
9. **Requeue**: Return `ctrl.Result{RequeueAfter: 10 * time.Second}`.
### 4.3 Job Construction
### 4.3 Helper Functions
Helper function to create the Job object:
#### getEffectiveLabels
```go
func (r *RunnerGroupReconciler) constructJobForRunnerGroup(runnerGroup *giteav1alpha1.RunnerGroup, registrationToken string) (*batchv1.Job, error) {
// Generate random suffix for name
name := fmt.Sprintf("%s-%s", runnerGroup.Name, randString(5))
Merges user-defined labels with Gitea defaults. If a user defines `ubuntu-latest`, it overrides the default `ubuntu-latest:docker://...`.
// Construct Env Vars
envVars := []corev1.EnvVar{
{Name: "GITEA_INSTANCE_URL", Value: runnerGroup.Spec.GiteaURL},
{Name: "GITEA_RUNNER_REGISTRATION_TOKEN", Value: registrationToken},
{Name: "GITEA_RUNNER_EPHEMERAL", Value: "true"},
{Name: "DOCKER_HOST", Value: "tcp://localhost:2376"},
// ... other envs from README
}
#### constructJobForRunnerGroup
if len(runnerGroup.Spec.Labels) > 0 {
labelsStr := strings.Join(runnerGroup.Spec.Labels, ",")
envVars = append(envVars, corev1.EnvVar{Name: "GITEA_RUNNER_LABELS", Value: labelsStr})
}
Creates the Job object with:
// Construct Job
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: runnerGroup.Namespace,
Labels: map[string]string{
"app": runnerGroup.Name,
"gitea.bpg.pw/runnergroup-name": runnerGroup.Name,
"gitea.bpg.pw/managed-by": "gitea-runner-operator",
},
},
Spec: batchv1.JobSpec{
TTLSecondsAfterFinished: pointer.Int32(600),
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyOnFailure,
Containers: []corev1.Container{
{
Name: "runner",
Image: "gitea/act_runner:nightly-dind-rootless",
ImagePullPolicy: corev1.PullAlways,
SecurityContext: &corev1.SecurityContext{Privileged: pointer.Bool(true)},
Env: envVars,
VolumeMounts: []corev1.VolumeMount{
{Name: "runner-data", MountPath: "/data"},
},
},
},
Volumes: []corev1.Volume{
{
Name: "runner-data",
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "act-runner-vol", // Note: Consider making this configurable or EmptyDir
},
},
},
},
},
},
},
}
// Set Controller Reference
if err := ctrl.SetControllerReference(runnerGroup, job, r.Scheme); err != nil {
return nil, err
}
return job, nil
}
```
- **Name**: `{runnergroup-name}-{random-suffix}`
- **Env**:
- `GITEA_RUNNER_NAME`: Set to the Job name.
- `GITEA_RUNNER_LABELS`: Comma-separated effective labels.
- Standard runner envs (`GITEA_INSTANCE_URL`, etc).
## 5. Gitea Client (`internal/gitea/client.go`)
A simple HTTP client wrapper to interact with Gitea.
A specialized client to interact with Gitea's Actions API.
### 5.1 Interface
```go
type RunnerStats struct {
QueuedJobs []ActionWorkflowJob
Running int
}
type Client interface {
GetQueuedRuns(ctx context.Context, scope RunnerGroupScope, owner, repo string, labels []string) (int, error)
GetRunnerStats(ctx context.Context, giteaURL, authToken string, scope RunnerGroupScope, org, repo string, labels []string) (*RunnerStats, error)
}
```
### 5.2 Implementation Details
### 5.2 Logic
- **Endpoint**: `/api/v1/repos/{owner}/{repo}/actions/runs`
- **Query Params**: `status=queued`
- **Filtering**:
- The API might return all queued runs.
- The client must filter these runs locally to ensure they match the `labels` defined in the RunnerGroup CR.
- _Note_: Gitea API might not support filtering by labels directly in the list endpoint, so client-side filtering is necessary.
1. **Endpoints**:
- Repo/Org/Global: Uses `/actions/jobs` endpoints.
- User: Fetches repos via `/users/{user}/repos`, then queries `/actions/jobs` for each repo.
2. **Fetching**:
- Fetches jobs with `status=queued`, `waiting`, `pending`.
- Handles pagination (fetches all pages).
3. **Filtering**:
- Iterates through fetched jobs.
- **Matches Labels**: Checks if the job's required labels are a subset of the runner's supported labels (effective labels).
- Supports exact match (`linux` == `linux`)
- Supports schema match (`ubuntu-latest` matches `ubuntu-latest:docker://...`)
- Returns only matching jobs in `QueuedJobs`.
## 6. Configuration & Deployment
## 6. Testing Strategy
### 6.1 Dockerfile
Standard Operator SDK Dockerfile. Ensure the base image is minimal (e.g., `gcr.io/distroless/static:nonroot`).
### 6.2 Kustomize
Update `config/default/kustomization.yaml` to include the CRD and RBAC configurations.
## 7. Testing Strategy
1. **Unit Tests**:
- Test `constructJobForRunnerGroup` to ensure Env vars and Labels are set correctly.
- Test Gitea Client response parsing.
2. **Integration Tests (EnvTest)**:
- Spin up a local k8s control plane.
- Create a `RunnerGroup` CR.
- Verify the controller creates a `Job` when the mocked Gitea client returns queued jobs.
- Verify the controller respects `MaxActiveRunners`.
1. **Unit Tests (`internal/gitea/client_test.go`)**:
- Mock Gitea API server.
- Verify `GetRunnerStats` correctly parses JSON and handles pagination.
- Verify label matching logic (subset, schema matching).
2. **Controller Tests**:
- Verify `SpawnedJobsCache` prevents double scheduling.
- Verify TTL logic allows retries for stuck jobs.
- Verify `getEffectiveLabels` merging logic.

View File

@@ -129,6 +129,7 @@ func (r *RunnerGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
authToken,
runnerGroup.Spec.Scope,
runnerGroup.Spec.Org,
runnerGroup.Spec.User,
runnerGroup.Spec.Repo,
effectiveLabels,
)

View File

@@ -38,6 +38,7 @@ type Client interface {
authToken string,
scope v1alpha1.RunnerGroupScope,
org string,
user string,
repo string,
labels []string,
) (*RunnerStats, error)
@@ -118,6 +119,7 @@ func (c *HTTPClient) GetRunnerStats(
authToken string,
scope v1alpha1.RunnerGroupScope,
org string,
user string,
repo string,
labels []string,
) (*RunnerStats, error) {
@@ -126,6 +128,8 @@ func (c *HTTPClient) GetRunnerStats(
return c.getRunnerStatsForRepo(ctx, giteaURL, authToken, org, repo, labels)
case v1alpha1.RunnerGroupScopeOrg:
return c.getRunnerStatsForOrg(ctx, giteaURL, authToken, org, labels)
case v1alpha1.RunnerGroupScopeUser:
return c.getRunnerStatsForUser(ctx, giteaURL, authToken, user, labels)
case v1alpha1.RunnerGroupScopeGlobal:
return c.getRunnerStatsGlobal(ctx, giteaURL, authToken, labels)
default:
@@ -145,6 +149,28 @@ func (c *HTTPClient) getRunnerStatsForOrg(ctx context.Context, giteaURL, authTok
return c.fetchRunnerStats(ctx, endpoint, authToken, labels)
}
// getRunnerStatsForUser fetches queued runs for all repos owned by a user
func (c *HTTPClient) getRunnerStatsForUser(ctx context.Context, giteaURL, authToken, user string, labels []string) (*RunnerStats, error) {
repos, err := c.fetchReposForUser(ctx, giteaURL, authToken, user)
if err != nil {
return nil, err
}
var allQueuedJobs []ActionWorkflowJob
for _, repo := range repos {
endpoint := fmt.Sprintf("%s/api/v1/repos/%s/%s/actions/jobs", strings.TrimSuffix(giteaURL, "/"), repo.Owner.Login, repo.Name)
stats, err := c.fetchRunnerStats(ctx, endpoint, authToken, labels)
if err != nil {
return nil, err
}
allQueuedJobs = append(allQueuedJobs, stats.QueuedJobs...)
}
return &RunnerStats{
QueuedJobs: allQueuedJobs,
}, nil
}
// getRunnerStatsGlobal fetches queued runs using admin-level API for global scope
func (c *HTTPClient) getRunnerStatsGlobal(ctx context.Context, giteaURL, authToken string, labels []string) (*RunnerStats, error) {
endpoint := fmt.Sprintf("%s/api/v1/admin/actions/jobs", strings.TrimSuffix(giteaURL, "/"))
@@ -475,6 +501,70 @@ func (c *HTTPClient) fetchUserRepos(ctx context.Context, giteaURL, authToken str
return allRepos, nil
}
// fetchReposForUser fetches all repositories owned by a specific user with pagination
func (c *HTTPClient) fetchReposForUser(ctx context.Context, giteaURL, authToken, username string) ([]Repository, error) {
var allRepos []Repository
page := 1
limit := 50
for {
endpoint := fmt.Sprintf("%s/api/v1/users/%s/repos", strings.TrimSuffix(giteaURL, "/"), username)
u, err := url.Parse(endpoint)
if err != nil {
return nil, err
}
q := u.Query()
q.Set("page", fmt.Sprintf("%d", page))
q.Set("limit", fmt.Sprintf("%d", limit))
u.RawQuery = q.Encode()
fmt.Printf("DEBUG: Fetching repos for user %s from %s\n", username, u.String())
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "token "+authToken)
req.Header.Set("Accept", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
fmt.Printf("DEBUG: Request failed: %v\n", err)
return nil, err
}
fmt.Printf("DEBUG: Response status: %s\n", resp.Status)
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
fmt.Printf("DEBUG: Error body: %s\n", string(body))
return nil, c.handleHTTPError(resp.StatusCode, body, "fetch user repos")
}
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
// fmt.Printf("DEBUG: Response body: %s\n", string(body))
var repos []Repository
if err := json.Unmarshal(body, &repos); err != nil {
fmt.Printf("DEBUG: Failed to decode response: %v\n", err)
return nil, err
}
allRepos = append(allRepos, repos...)
if len(repos) < limit {
break
}
page++
}
return allRepos, nil
}
// filterQueuedJobs filters workflow jobs by labels
func (c *HTTPClient) filterQueuedJobs(jobs []ActionWorkflowJob, runnerLabels []string) []ActionWorkflowJob {
var matched []ActionWorkflowJob

View File

@@ -32,6 +32,7 @@ func TestHTTPClient_GetRunnerStats(t *testing.T) {
name string
scope v1alpha1.RunnerGroupScope
org string
user string
repo string
labels []string
mockResponse ActionWorkflowJobsResponse
@@ -87,12 +88,43 @@ func TestHTTPClient_GetRunnerStats(t *testing.T) {
expectedQueued: 2,
expectedError: false,
},
{
name: "user scope",
scope: v1alpha1.RunnerGroupScopeUser,
user: "testuser",
labels: []string{"linux"},
mockResponse: ActionWorkflowJobsResponse{
TotalCount: 1,
Jobs: []ActionWorkflowJob{
{ID: 1, Status: "queued", Labels: []string{"linux"}},
},
},
expectedQueued: 1,
expectedError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create mock server
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Handle User Repos call for User Scope
if tt.scope == v1alpha1.RunnerGroupScopeUser && strings.Contains(r.URL.Path, "/repos") && !strings.Contains(r.URL.Path, "/actions/jobs") {
repos := []Repository{
{
Name: "testrepo",
Owner: struct {
Login string `json:"login"`
}{Login: tt.user},
FullName: tt.user + "/testrepo",
},
}
json.NewEncoder(w).Encode(repos)
return
}
// Verify correct endpoint is called
expectedPath := ""
switch tt.scope {
@@ -102,6 +134,8 @@ func TestHTTPClient_GetRunnerStats(t *testing.T) {
expectedPath = "/api/v1/orgs/testorg/actions/jobs"
case v1alpha1.RunnerGroupScopeGlobal:
expectedPath = "/api/v1/admin/actions/jobs"
case v1alpha1.RunnerGroupScopeUser:
expectedPath = "/api/v1/repos/" + tt.user + "/testrepo/actions/jobs"
}
if !strings.HasPrefix(r.URL.Path, expectedPath) {
@@ -114,8 +148,6 @@ func TestHTTPClient_GetRunnerStats(t *testing.T) {
t.Errorf("Expected Authorization header to start with 'token ', got %s", authHeader)
}
w.Header().Set("Content-Type", "application/json")
// Only return jobs for 'queued' status to simplify counting
if r.URL.Query().Get("status") == "queued" {
json.NewEncoder(w).Encode(tt.mockResponse)
@@ -132,6 +164,7 @@ func TestHTTPClient_GetRunnerStats(t *testing.T) {
"test-token",
tt.scope,
tt.org,
tt.user,
tt.repo,
tt.labels,
)

View File

@@ -10,6 +10,8 @@ The Gitea Runner Operator is a Kubernetes controller designed to manage ephemera
- **RunnerGroup CR**: The custom resource instance defining a runner pool.
- **Ephemeral Runner**: A runner that executes exactly one job and then terminates.
- **Gitea Instance**: The target Gitea server where CI/CD workflows are triggered.
- **Runner Capabilities**: The set of labels a runner provides (e.g., `ubuntu-latest`).
- **Job Requirements**: The set of labels a job requests (e.g., `ubuntu-latest`).
## 3. Custom Resource Definition (CRD)
@@ -25,12 +27,13 @@ The Gitea Runner Operator is a Kubernetes controller designed to manage ephemera
The `spec` defines the configuration for the runner pool.
| Field | Type | Required | Description |
| :------------------ | :----------------------------- | :---------- | :---------------------------------------------------------------------------------------------------------- |
| `scope` | Enum (`global`, `org`, `repo`) | Yes | The scope of the runner. |
| :------------------ | :------------------------------------- | :---------- | :---------------------------------------------------------------------------------------------------------- |
| `scope` | Enum (`global`, `org`, `user`, `repo`) | Yes | The scope of the runner. |
| `org` | String | Conditional | The organization name. Required if `scope` is `org`. |
| `user` | String | Conditional | The username. Required if `scope` is `user`. |
| `repo` | String | Conditional | The repository name. Required if `scope` is `repo`. |
| `gitea.url` | String | Yes | The base URL of the Gitea instance (e.g., `https://gitea.example.com`). |
| `labels` | []String | No | List of labels for the runner (e.g., `ubuntu-latest`, `app:infra`). Used by Gitea to match jobs to runners. |
| `labels` | []String | No | List of labels for the runner (e.g., `app:infra`). Defaults (e.g. `ubuntu-latest`) are added automatically. |
| `maxActiveRunners` | Integer | Yes | The maximum number of concurrent runner Jobs allowed for this specific RunnerGroup CR. |
| `registrationToken` | SecretKeySelector | Yes | Reference to a Secret containing the runner registration token. |
| `authToken` | SecretKeySelector | Yes | Reference to a Secret containing an API token to query Gitea for job statuses. |
@@ -42,7 +45,7 @@ Standard Kubernetes Secret reference:
- `secretRef.name`: Name of the secret.
- `secretRef.key`: Key within the secret containing the value.
### 3.3 Status Schema (Optional but Recommended)
### 3.3 Status Schema
- `activeRunners`: Integer. Current count of running Jobs managed by this CR.
- `lastCheckTime`: Timestamp. Last time the controller polled Gitea.
@@ -54,37 +57,44 @@ Standard Kubernetes Secret reference:
The controller watches for changes to `RunnerGroup` resources.
1. **Validation**: Ensure `org` or `repo` are present based on `scope`.
2. **Job Cleanup**: (Optional) Check for and remove "stuck" jobs if TTL doesn't cover edge cases, though `ttlSecondsAfterFinished` is primary.
3. **Metric Collection**: Update status with current running job count.
4. **Polling**: The controller must implement a polling mechanism (loop) independent of the standard Reconcile trigger, or requeue the Reconcile event periodically (e.g., every 10-30 seconds).
2. **Job List**: List child Jobs to determine `activeRunners` count.
3. **Status Update**: Update CR status with current metrics.
4. **Capacity Check**: If `activeRunners >= maxActiveRunners`, stop scaling up.
5. **Polling**: Fetch job statistics from Gitea.
### 4.2 Polling & Scaling Logic
### 4.2 Polling & Scaling Strategy
On every poll interval for a specific `RunnerGroup` CR:
The operator uses a robust polling strategy to handle the disconnect between Kubernetes Pod startup time and Gitea's job queue state.
1. **Check Capacity**:
- Query Kubernetes for active `Jobs` owned by this `RunnerGroup` CR.
- If `count(active_jobs) >= maxActiveRunners`, stop. Do not spawn new runners.
#### 4.2.1 Fetching Stats (`GetRunnerStats`)
2. **Fetch Queued Jobs**:
- Call Gitea API using `authToken`.
- Endpoint depends on scope:
- **Global**: Recursively fetch all workflow runs:
1. Fetch all organizations in the Gitea instance
2. For each organization, fetch all repositories under that org
3. For each repository, query `/repos/{owner}/{repo}/actions/runs?status=queued`
4. Additionally, fetch all user-owned repositories and query their workflow runs
- **Org**: Fetch all workflow runs in repos under the organization:
1. Fetch all repositories under the specified organization
2. For each repository, query `/repos/{owner}/{repo}/actions/runs?status=queued`
- **Repo**: Directly query `/repos/{owner}/{repo}/actions/runs?status=queued`
- Filter the returned runs:
- Must match the `labels` defined in the `RunnerGroup` CR.
The controller queries Gitea for:
3. **Spawn Runner**:
- If a queued job is found and capacity allows, create a Kubernetes `Job`.
- **One Job per Queued Workflow**: Ideally, the logic should map 1 queued run -> 1 Runner Job.
- **Concurrency Control**: Ensure we don't spawn more jobs than `maxActiveRunners - currentActiveRunners`.
1. **Queued Jobs**: Jobs with status `queued`, `waiting`, or `pending`.
- **Label Filtering**: Jobs are filtered client-side. A job is considered a match if the RunnerGroup's capabilities (Spec labels + Default labels) are a superset of the Job's required labels.
2. **Running Jobs**: Jobs with status `running` that belong to this specific runner group (filtered by runner name prefix).
#### 4.2.2 Deduplication Cache (`SpawnedJobsCache`)
To prevent "double scheduling" (where multiple reconciliation loops spawn multiple runners for the same queued job before the first runner can pick it up), the controller maintains an in-memory cache:
- **Key**: Gitea Job ID.
- **Value**: Timestamp when the runner was spawned.
- **TTL**: 5 minutes.
#### 4.2.3 Scaling Algorithm
1. **Identify Candidates**: Iterate through the list of Queued Jobs from Gitea.
2. **Check Cache**:
- If Job ID is in cache and TTL has not expired: **Skip** (Runner already spawned).
- If Job ID is in cache and TTL expired: **Retry** (Runner likely failed to start).
- If Job ID is not in cache: **Candidate for spawning**.
3. **Calculate Slots**: `availableSlots = maxActiveRunners - activeRunners`.
4. **Spawn**: For each candidate, if `availableSlots > 0`:
- Create Kubernetes Job.
- Add Job ID to `SpawnedJobsCache`.
- Decrement `availableSlots`.
5. **Cleanup**: Remove Job IDs from the cache if they are no longer present in the Queued Jobs list returned by Gitea (implies they are now Running, Completed, or Cancelled).
## 5. Kubernetes Resource Generation
@@ -94,40 +104,44 @@ The controller creates a `batch/v1 Job`.
**Metadata:**
- `name`: `{runnergroup-cr-name}-{random-suffix}`
- `name`: `{runnergroup-name}-{random-suffix}`
- `namespace`: Same as `RunnerGroup` CR.
- `labels`:
- `app`: `{runnergroup-cr-name}`
- `gitea.bpg.pw/runnergroup-name`: `{runnergroup-name}`
- `gitea.bpg.pw/managed-by`: `gitea-runner-operator`
- `gitea.bpg.pw/runnergroup-name`: `{runnergroup-cr-name}`
- `ownerReferences`: Pointing to the `RunnerGroup` CR.
**Spec:**
- `ttlSecondsAfterFinished`: 600 (Clean up finished jobs).
- `ttlSecondsAfterFinished`: 600 (Auto-cleanup).
- `template`:
- `spec`:
- `restartPolicy`: `OnFailure`
- `containers`:
- **Name**: `runner`
- **Image**: `gitea/act_runner:nightly-dind-rootless` (Default, potentially configurable in CR later).
- **SecurityContext**: `privileged: true` (Required for DIND).
- **Image**: `gitea/act_runner:nightly-dind-rootless`
- **Env**:
- `GITEA_INSTANCE_URL`: From `spec.gitea.url`.
- `GITEA_RUNNER_REGISTRATION_TOKEN`: From `spec.registrationToken`.
- `GITEA_RUNNER_REGISTRATION_TOKEN`: From Secret.
- `GITEA_RUNNER_EPHEMERAL`: `"true"`.
- `GITEA_RUNNER_LABELS`: Comma-separated list from `spec.labels`.
- `DOCKER_HOST`: `tcp://localhost:2376`
- **VolumeMounts**:
- Mount docker socket or storage if necessary. The README example uses a PVC `act-runner-vol` mounted to `/data`. _Note: Using a shared PVC for ephemeral runners might cause race conditions. EmptyDir is preferred for truly ephemeral runners unless caching is strictly required and managed._
- `GITEA_RUNNER_NAME`: `{job-name}` (Matches Pod name for easier debugging).
- `GITEA_RUNNER_LABELS`: Comma-separated list of **Effective Labels**.
- **Effective Labels** = `spec.labels` + Default Gitea Labels (e.g., `ubuntu-latest:docker://node:16-bullseye`, `ubuntu-22.04:...`, etc.) unless explicitly overridden.
## 6. Gitea API Interaction
- **Authentication**: Bearer token provided in `authToken`.
- **Client**: HTTP Client with timeout.
- **Endpoints Used**:
- `/api/v1/repos/{owner}/{repo}/actions/jobs` (Repo scope)
- `/api/v1/orgs/{org}/actions/jobs` (Org scope)
- `/api/v1/users/{user}/repos` + `/api/v1/repos/{owner}/{repo}/actions/jobs` (User scope)
- `/api/v1/admin/actions/jobs` (Global scope)
- **Label Matching**:
- The controller implements logic to check: `Job.Labels ⊆ Runner.EffectiveLabels`.
- Supports both exact matches (`linux`) and schema matches (`ubuntu-latest` matches `ubuntu-latest:docker://...`).
## 7. Security Considerations
- **Token Handling**: Registration and Auth tokens are read from Kubernetes Secrets and injected as Environment Variables. They are not stored in plain text in the CR.
- **Privileged Mode**: The default `act_runner` image (dind) requires privileged mode. The Operator creates Jobs with this permission.
- **Namespace Isolation**: The Operator should respect RBAC and only operate within allowed namespaces.
- **Token Handling**: Tokens are injected via `valueFrom: secretKeyRef` env vars.
- **Privileged Mode**: `act_runner` dind mode requires privileged security context.
- **Namespace Isolation**: Controller operates within the namespace of the RunnerGroup.