feat(09-tooling-portable-setup-02): implement file/cron setup — scripts, skills, cron jobs, gateway restart

- write_session_init_script: mount verification via shell_init_files (D-10)
- write_archive_script: DRY_RUN=true archive script for stale sessions (D-10)
- write_jira_skill, write_aws_skill, write_confluence_skill, write_bitbucket_skill, write_session_skill: all 5 skills with 2 reference files embedded as heredocs
- register_cron_jobs: 3 cron jobs via hermes cron create (ngn-daily-report, ngn-weekly-stale-summary, ngn-weekly-archive)
- offer_gateway_restart: prompt to restart Hermes gateway at end
- Main execution block [1/14] through [14/14] with progress indicators
- Best-effort error handling for non-critical steps
- D-10 referenced throughout for traceability
This commit is contained in:
2026-06-15 23:30:27 +08:00
parent 9da972842d
commit 5a8c18380e

View File

@@ -394,3 +394,947 @@ generate_cron_env_config() {
echo " ✓ Cron env vars configured" echo " ✓ Cron env vars configured"
} }
# =============================================================================
# Task 3: File/Cron Setup (D-10)
# =============================================================================
# ---- Write session-init.sh (D-10) ----
write_session_init_script() {
echo " → Writing session-init.sh (D-10)..."
cat > "$HOME/.hermes/scripts/session-init.sh" << 'SCRIPT'
#!/bin/bash
# session-init.sh — Verify DEFAULT_REPOS mounts at session start
# Runs via shell_init_files before agent prompt. Non-blocking.
# Reads DEFAULT_REPOS from environment (forwarded via docker_forward_env).
set -uo pipefail
DEFAULT_REPOS="${DEFAULT_REPOS:-}"
if [ -z "$DEFAULT_REPOS" ]; then
echo "[session-init] DEFAULT_REPOS not set — skipping verification"
exit 0
fi
# Split comma-separated list
IFS=',' read -ra REPOS <<< "$DEFAULT_REPOS"
ALL_OK=true
for repo in "${REPOS[@]}"; do
# Trim whitespace
repo="${repo#"${repo%%[![:space:]]*}"}"
repo="${repo%"${repo##*[![:space:]]}"}"
if [ -d "/workspace/$repo/.git" ]; then
echo "[session-init] ✓ $repo — mounted at /workspace/$repo"
else
echo "[session-init] ⚠ $repo — NOT FOUND at /workspace/$repo"
ALL_OK=false
fi
done
if [ "$ALL_OK" = true ]; then
echo "[session-init] All DEFAULT_REPOS verified"
else
echo "[session-init] Some repos missing — check docker_volumes in config.yaml"
fi
exit 0 # always exit cleanly — non-blocking
SCRIPT
chmod +x "$HOME/.hermes/scripts/session-init.sh"
echo " ✓ session-init.sh written and executable"
}
# ---- Write archive-stale-sessions.sh (D-10) ----
write_archive_script() {
echo " → Writing archive-stale-sessions.sh (D-10)..."
cat > "$HOME/.hermes/scripts/archive-stale-sessions.sh" << 'SCRIPT'
#!/bin/bash
# Archive stale sessions (inactive >30 days) and prune from live DB
# This script runs via hermes cron with --no-agent
# Stdout is delivered to Telegram via --deliver telegram
# Dry-run mode: export only, no prune — safe default for first run
set -euo pipefail
DRY_RUN=true
ARCHIVE_DIR="$HOME/.hermes/archive/sessions"
mkdir -p "$ARCHIVE_DIR"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
OUTPUT_FILE="$ARCHIVE_DIR/sessions-${TIMESTAMP}.jsonl"
echo "=== Stale Session Archive ==="
echo "Started: $(date)"
echo "Dry run: $DRY_RUN"
echo ""
echo "[1/3] Exporting session store..."
echo " Output: $OUTPUT_FILE"
hermes sessions export "$OUTPUT_FILE"
echo " -> $(wc -l < "$OUTPUT_FILE") sessions exported"
echo " -> Size: $(du -h "$OUTPUT_FILE" | cut -f1)"
echo ""
if [ "$DRY_RUN" = false ]; then
echo "[2/3] Pruning sessions older than 30 days..."
hermes sessions prune --older-than 30 --yes
echo " Done."
else
echo "[2/3] SKIPPED (dry run) — set DRY_RUN=false to enable prune"
echo " Review $OUTPUT_FILE before enabling."
fi
echo ""
echo "[3/3] Post-archive stats:"
hermes sessions stats
echo ""
echo "✓ Archive complete."
SCRIPT
chmod +x "$HOME/.hermes/scripts/archive-stale-sessions.sh"
echo " ✓ archive-stale-sessions.sh written and executable"
}
# ---- Write skill files (D-10) ----
write_jira_skill() {
mkdir -p "$HOME/.hermes/skills/ngn-agent/jira"
cat > "$HOME/.hermes/skills/ngn-agent/jira/SKILL.md" << 'SKILL'
---
name: jira-query
description: Query Jira Cloud issues, search, and manage tickets
metadata:
hermes:
tags: [jira, project-management]
category: devops
requires_toolsets: [terminal]
version: 1.0.0
---
# Jira Cloud Query
## When to Use
When the user asks to search Jira issues, check ticket status, or list project work.
## Procedure
### 1. Search issues by JQL
```bash
ngn-jira GET '/rest/api/3/search?jql=ORDER BY created DESC&maxResults=10'
```
For specific project:
```bash
ngn-jira GET '/rest/api/3/search?jql=project=PROJ ORDER BY created DESC&maxResults=10'
```
### 2. Get issue details
```bash
ngn-jira GET '/rest/api/3/issue/PROJ-123'
```
### 3. List sprints (if Jira Software)
```bash
ngn-jira GET '/rest/agile/1.0/board'
ngn-jira GET '/rest/agile/1.0/board/{boardId}/sprint?state=active'
```
### 4. Get issue comments
```bash
ngn-jira GET '/rest/api/3/issue/PROJ-123/comment'
```
## Pitfalls
- JQL is case-sensitive for field names
- maxResults defaults to 50; set explicitly for large queries
- Agile REST API may not be available on all plans
## Required Environment
- `JIRA_EMAIL` — your Atlassian account email
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
SKILL
echo " ✓ jira/SKILL.md written"
}
write_aws_skill() {
mkdir -p "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references"
cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/SKILL.md" << 'SKILL'
---
name: aws-diagnostics
description: Read-only AWS diagnostics for platform engineering
metadata:
hermes:
tags: [aws, diagnostics, platform-engineering]
category: devops
requires_toolsets: [terminal]
version: 1.0.0
---
# AWS Diagnostics
## When to Use
When the user asks to check AWS resources, investigate issues, or audit infrastructure in any account.
## Important
- ALWAYS determine the correct AWS_PROFILE before running commands
- NEVER run mutating AWS commands (delete, terminate, stop, modify)
- Prefer read-only AWS CLI commands (describe, list, get)
## Procedure
### 1. Identify the target account
Ask the user which account/environment they want to target. Available profiles in `/.aws/config`:
- `rzaws-sw-rai-ava-dev/prod/rc` — AVA service
- `rzaws-sw-rai-cs-dev/prod/rc` — CS service
- `rzaws-sw-rai-qac-dev/prod` — QAC
- `rzaws-sw-rai-ops` — Ops account
- `rzaws-sw-rai-voicekit-dev/prod/rc` — VoiceKit
- `rzaws-sw-rai-preprod` — Pre-production
- `rzaws-sw-rai-nonprod` — Non-production
### 2. Set the profile
```bash
export AWS_PROFILE=rzaws-sw-rai-<service>-<env>
```
### 3. Diagnostic commands
**EC2 instances:**
```bash
aws ec2 describe-instances --query 'Reservations[*].Instances[*].[InstanceId,State.Name,InstanceType,Tags[?Key==`Name`].Value|[0]]' --output table
```
**ECS services:**
```bash
aws ecs list-clusters && aws ecs list-services --cluster <name>
```
**S3 buckets:**
```bash
aws s3 ls
```
**CloudWatch alarms:**
```bash
aws cloudwatch describe-alarms --state-value ALARM --output table
```
**ECS task health:**
```bash
aws ecs describe-tasks --cluster <name> --tasks <task-ids>
```
**RDS instances:**
```bash
aws rds describe-db-instances --query 'DBInstances[*].[DBInstanceIdentifier,DBInstanceStatus,Engine,DBInstanceClass]' --output table
```
**Lambda functions:**
```bash
aws lambda list-functions --query 'Functions[*].[FunctionName,Runtime,LastModified]' --output table
```
**ELB target group health:**
```bash
aws elbv2 describe-target-groups --query 'TargetGroups[*].[TargetGroupName,TargetType]' --output table
```
### 4. Report findings
Format as a concise table. Include account ID and profile used.
## Alternative: Infrastructure Code Analysis
When AWS CLI access is unavailable (Docker containers, credential issues), examine existing infrastructure code instead:
```bash
# Search for region patterns
search_files --pattern="us-west-2" --path="/workspace"
# Check terraform configurations
read_file /workspace/rai-ops/aws/<account>/us-east-1/app/main.tf
read_file /workspace/rai-ops/aws/<account>/us-east-1/app/<app>.tfvars
# Look for provider configurations
search_files --pattern="provider.*replica" --path="/workspace/rai-ops"
# Check S3 migration data
search_files --pattern="s3-mapping" --target="files"
```
**When to use code analysis:**
- Docker container with read-only filesystem
- Missing AWS CLI or credentials
- Need to understand intended architecture vs live state
- Investigating multiregional setup patterns
## Pitfalls
- SSO tokens expire (~6-8h). If you get auth errors, ask the user to run `aws sso login`
- Some accounts may not have all services — check `aws sts get-caller-identity` first
- Don't pipe large results directly — use `--query` and `--output table` for readability
- **Don't persist with CLI installation in constrained environments** — switch to code analysis quickly when installation fails
## Verification
Run `aws sts get-caller-identity` to confirm the correct profile is active before running diagnostics.
## References
- `references/multiregional-patterns.md` - Terraform patterns for cross-region infrastructure setup
SKILL
# AWS reference file
cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references/multiregional-patterns.md" << 'REF'
# Multiregional Infrastructure Patterns
## AVA Multiregional Setup
### Provider Configuration Pattern
```hcl
# Primary provider (us-east-1)
provider "aws" {
region = var.region
# ... assume_role block
}
# Replica provider (us-west-2)
provider "aws" {
alias = "replica"
region = "us-west-2"
# ... same assume_role block
}
```
### Module Consumption
```hcl
module "app" {
providers = {
aws = aws
aws.replica = aws.replica # Required by tf-modules/app/versions.tf
}
# ... other config
}
```
### Database Replication Options
**RDS Aurora PostgreSQL (Current Pattern)**
- Engine: `aurora-postgresql`
- Version: `16.11`
- Cross-region read replicas supported
- Can promote replica for DR scenarios
**DynamoDB Global Tables (Available)**
- Global Table v2 with us-east-1 + us-west-2 replicas
- Per-region CMKs for encryption
- Feature-flagged via `var.tenant_registry`
- Documented in RAID-352
### S3 Cross-Region Replication
Extensive existing pattern from migration data:
- `ava-{env}-west-*` buckets in us-west-2
- Matching `rai-s3-usw2-*` naming convention
- Covers: bug reports, screenshots, game logs, shiny moments
### Key Files for Multiregional Analysis
- `aws/<account>/us-east-1/app/provider.tf` - Replica provider config
- `aws/<account>/us-east-1/app/<app>.tfvars` - App-specific resources
- `raid-migration/raid-s3-migration/s3-mapping.csv` - Cross-region S3 inventory
- `RAID-352-PR-DESCRIPTION.md` - DynamoDB Global Tables documentation
REF
echo " ✓ aws-diagnostics/SKILL.md + references written"
}
write_confluence_skill() {
mkdir -p "$HOME/.hermes/skills/ngn-agent/confluence"
cat > "$HOME/.hermes/skills/ngn-agent/confluence/SKILL.md" << 'SKILL'
---
name: confluence-search
description: Search and retrieve Confluence pages
metadata:
hermes:
tags: [confluence, documentation]
category: devops
requires_toolsets: [terminal]
version: 1.0.0
---
# Confluence Search
## When to Use
When the user asks to find documentation, search Confluence pages, or retrieve page content.
## Procedure
### 1. Search pages by text
```bash
ngn-confluence GET '/rest/api/search?cql=text~"search terms"&limit=10'
```
### 2. Search by space
```bash
ngn-confluence GET '/rest/api/search?cql=space=ADM&limit=10'
```
### 3. Get page content
```bash
ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage'
```
### 4. List pages in space
```bash
ngn-confluence GET '/rest/api/content?spaceKey=ADM&limit=50'
```
### 5. Get page children
```bash
ngn-confluence GET '/rest/api/content/{pageId}/child/page?limit=50'
```
## Pitfalls
- CQL is different from JQL — `text~"query"` for full-text search
- Page body needs `expand=body.storage` to retrieve content
- Use `limit` parameter — defaults to 25
## Required Environment
- `JIRA_EMAIL` — your Atlassian account email
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
SKILL
echo " ✓ confluence/SKILL.md written"
}
write_bitbucket_skill() {
mkdir -p "$HOME/.hermes/skills/ngn-agent/bitbucket"
cat > "$HOME/.hermes/skills/ngn-agent/bitbucket/SKILL.md" << 'SKILL'
---
name: bitbucket-pr
description: Review Bitbucket pull requests and repositories
metadata:
hermes:
tags: [bitbucket, git, code-review]
category: devops
requires_toolsets: [terminal]
version: 1.0.0
---
# Bitbucket Pull Requests
## When to Use
When the user asks to check PRs, review code, or list repositories.
## Procedure
### 1. List repositories
```bash
ngn-bitbucket GET '/repositories/razersw?pagelen=20'
```
### 2. List open PRs for a repo
```bash
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests?state=OPEN&pagelen=20'
```
### 3. Get PR details
```bash
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}'
```
### 4. Get PR diff
```bash
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/diff'
```
### 5. Get PR comments
```bash
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/comments'
```
### 6. List branch list
```bash
ngn-bitbucket GET '/repositories/razersw/{repo}/refs/branches?pagelen=20'
```
## Pitfalls
- Bitbucket pagination uses `pagelen` and `page` params (not `maxResults`)
- Diff endpoint returns raw diff text — may be large
- PR comments include inline code comments, not just summary
## Required Environment
- `JIRA_EMAIL` — your Atlassian account email
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
SKILL
echo " ✓ bitbucket/SKILL.md written"
}
write_session_skill() {
mkdir -p "$HOME/.hermes/skills/ngn-agent/session/references"
cat > "$HOME/.hermes/skills/ngn-agent/session/SKILL.md" << 'SKILL'
---
name: session
description: Main ngn-agent session lifecycle — init, work, close
metadata:
hermes:
tags: [ngn-agent, platform-engineering, session]
category: devops
requires_toolsets: [terminal]
version: 1.0.0
---
# ngn-agent Session Lifecycle
## When to Use
Load this skill at the START of EVERY platform engineering session, before any other work. This skill defines the standard session workflow.
Specific triggers:
- When the user starts any infrastructure or platform engineering task
- When the user asks to create a Jira ticket or find a ticket
- When the user wants to search or load Confluence documentation
- When a session is ending and you need to document progress
- When you need to save context for future sessions
## Important
- **Keep this skill loaded for the entire session** — if context grows large, reload via `skill_view("session")` before the session-end steps (Steps 57)
- **Never create Jira tickets without asking the user first** (D-02)
- **Never update Confluence without asking the user first** (D-11)
- **Always save session summary to hindsight at end** — this step has no user prompt, it is automatic (D-12)
- User must confirm before any Jira mutation (create, comment, transition) — D-08
- Repos are already mounted at `/workspace/` from Phase 6 (rai-ops, rai-deployment, rai-devtools)
- This skill replaces the ad-hoc session workflow with a repeatable init→work→close pattern
## Procedure
### 1. Check for Similar Previous Sessions
At the very start of a session, use `hindsight_recall` with a query describing the user's current task to find similar sessions from the last 2 weeks.
Call `hindsight_recall` with a budget of low:
```
Tool: hindsight_recall
Query: "<user's task description>"
Budget: low
```
Present any matches to the user in this format:
```
Found [N] similar sessions from the last 2 weeks:
1. [Session Title] — [Date] — [one-line summary]
2. [Session Title] — [Date] — [one-line summary]
```
Ask the user: "Would you like to resume any of these sessions, or start fresh?"
- If they choose to resume: load that session's context and continue
- If they choose fresh: proceed to step 2
If no similar sessions are found (normal for first sessions), proceed to step 2.
### 2. Prompt: Create Jira Ticket
Ask the user: "Would you like to create a Jira Task ticket for this session?"
If YES:
1. Ask which Jira project to use (e.g., "PLATFORM", "DEVOPS") — do not hardcode (D-06)
2. Check hindsight for cached epics:
```
Tool: hindsight_recall
Query: "jira epics cached"
Budget: low
```
3. If epics are cached, check the cache timestamp:
- If the cache is more than 24 hours old OR the user says the list looks wrong, refresh from Jira:
```bash
ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=<PROJECT>&fields=summary,id&maxResults=50'
```
Save fresh epics to hindsight:
```
Tool: hindsight_retain
tier: "epic-cache"
content: "Epic Cache [<date>]: PROJECT=<PROJECT>: [EPIC-KEY-1: Summary, EPIC-KEY-2: Summary, ...]"
```
- If the cache is fresh (less than 24 hours old), use the cached list
4. If no cached epics found, query Jira for current epics:
```bash
ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=<PROJECT>&fields=summary,id&maxResults=50'
```
Save to hindsight for future sessions:
```
Tool: hindsight_retain
tier: "epic-cache"
content: "Epic Cache [<date>]: PROJECT=<PROJECT>: [EPIC-KEY-1: Summary, ...]"
```
5. Present cached/refreshed epics to the user: "Available epics: [list]. Would you like to set a parent epic?"
6. If user selects an epic, include it as parent when creating the ticket
7. Create the Task via Jira REST API:
```bash
ngn-jira POST '/rest/api/3/issue' --body '{
"fields": {
"project": {"key": "<PROJECT>"},
"summary": "<session task description>",
"issuetype": {"name": "Task"},
"parent": {"key": "<EPIC_KEY>"}
}
}'
```
8. Note the ticket key (e.g., `PLATFORM-123`) — save it for session-end steps (Step 5)
If NO: proceed to step 3 (no Jira ticket this session)
### 3. Prompt: Load Confluence Documentation
Ask the user: "Would you like to load relevant Confluence documentation?"
If YES:
1. Search by the `ngn-agent` tag:
```bash
ngn-confluence GET '/rest/api/search?cql=tag="ngn-agent"&limit=20'
```
2. Present matching pages to the user:
```
Found [N] pages tagged 'ngn-agent':
- [Title] — [Space] — [Last Modified]
```
3. Ask: "Which pages would you like me to load?"
4. For each selected page, load its full content:
```bash
ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage'
```
5. Review the loaded content with the user
If NO: proceed to step 4
### 4. Work Phase
Repos are already mounted at `/workspace/` (rai-ops, rai-deployment, rai-devtools). Proceed with the task using standard Hermes tools.
If you need to clone additional repos:
```bash
git clone git@bitbucket.org:razersw/<repo>.git /workspace/<repo>
```
The session skill remains loaded for the session-end steps below. If the skill is evicted from context during a long session, reload it with `skill_view("session")` before proceeding to Steps 57.
### 5. Session-End: Update Jira
When the user indicates work is complete or the session wraps up:
Ask the user: "Would you like me to update the Jira ticket with a summary comment?"
If YES (and a ticket was created in Step 2):
```bash
ngn-jira POST '/rest/api/3/issue/<TICKET-KEY>/comment' --body '{
"body": "<summary of work done, key decisions, next steps>"
}'
```
If NO: proceed without updating Jira.
**Important (D-08):** Do NOT transition tickets (e.g., close, resolve, move to Done) without explicit user confirmation. Only add comments unless the user specifically asks for a status change.
### 6. Session-End: Update Confluence
Ask the user: "Would you like me to create or update a Confluence page documenting this session?"
If YES:
- For a new page:
```bash
ngn-confluence POST '/rest/api/content' --body '{
"type": "page",
"title": "<Session Date>: <Task Description>",
"space": {"key": "<SPACE_KEY>"},
"body": {
"storage": {
"value": "<h1>Session Summary</h1><p><task summary, key decisions, outcomes></p>",
"representation": "storage"
}
},
"metadata": {
"properties": {
"content-appearance": {"value": "page"}
}
},
"labels": [{"name": "ngn-agent"}]
}'
```
- For updating an existing page: ask the user which page to update, then PUT to update its content
- **Important (D-11):** Do NOT create or update any Confluence page without the user confirming first
If NO: proceed without updating Confluence.
### 7. Session-End: Save to Hindsight (Automatic — No Prompt)
ALWAYS save a session summary to hindsight memory. Do NOT ask the user — this step is automatic and unconditional (D-12).
```bash
Tool: hindsight_retain
tier: "session-summary"
content: "
Session Summary
===============
Date: <today>
Task: <task description>
Repos: <repos worked on>
Jira: <ticket key or \"none\">
Key Decisions:
- <decision 1>
- <decision 2>
Outcomes:
- <outcome 1>
- <outcome 2>
Next Steps:
- <next step 1>
"
```
This summary allows future `hindsight_recall` queries to find this session for similarity matching (D-13). The structured content includes: date, task description, repos worked on, Jira ticket reference (or "none"), key decisions, outcomes, and next steps.
## Pitfalls
- **Skill not loaded at session start:** If you find yourself midway through a session without having run Steps 13, you missed the session start workflow. Run Step 1 (hindsight_recall) retroactively and ask the user if they want to create a Jira ticket or load Confluence docs. For future sessions, make sure to load this skill at the very start.
- **Epic cache too old:** Epics may change between sessions. Check the cache timestamp and refresh if more than 24 hours old. If the user says "that's wrong," always refresh regardless of age.
- **Confluence tag mismatch:** If the `ngn-agent` tag returns no results, try `platform-engineering` as a fallback, or ask the user what tag they use for session documentation.
- **Jira project doesn't exist:** If the create ticket call fails with a 404, the project key may be wrong. Ask the user to confirm the correct project key.
- **Empty hindsight recall (first sessions):** The first few sessions will have no similar sessions to find. That is normal — proceed with a fresh session. Over time, hindsight will accumulate session summaries.
- **Long sessions may evict this skill:** If the conversation grows long, the session skill content may be evicted from the agent's context. Reload it with `skill_view("session")` before the session-end steps (Steps 57) to ensure the Jira/Confluence prompts and hindsight save are not missed.
- **Missing Jira credentials in cron jobs:** The ngn-jira tool requires both `JIRA_EMAIL` and `JIRA_API_TOKEN` environment variables. If either is missing, Jira operations will fail with "unbound variable" errors. Check environment setup before attempting Jira updates in automated workflows.
## Operational Automation
### Daily Session Monitoring (Cron Job)
When running as a scheduled cron job for operational monitoring:
1. **Discover Active Sessions**:
```bash
hermes sessions export - # NOT 'hermes sessions list' - no --json flag available
```
Parse JSONL output with Python to find sessions with `last_active` within last 7 days
2. **Find Associated Jira Tickets**:
- Use `hindsight_recall` with query 'session summary jira' for each active session
- Search session messages for Jira patterns: `PLATFORM-\d+`, `AIOPS-\d+`, `RAID-\d+`, etc.
- Note: One session may have multiple Jira tickets (1-to-many mapping)
3. **Update Jira with Progress**:
```bash
ngn-jira POST '/rest/api/3/issue/<KEY>/comment' --body '{
"body": "Session activity update — Date: <today>, Last active: <last_active>. Session: <session_id>. Progress: See session transcript for details."
}'
```
4. **Generate Telegram Report**:
- Structure: Active Sessions + Jira Updated + Issues/Summary
- Keep under 4096 character limit
- Format with emoji sections for clarity
**Environment Requirements for Operational Jobs**:
- `JIRA_EMAIL` — Required for ngn-jira authentication
- `JIRA_API_TOKEN` — API token from Atlassian account
- Both must be set or Jira updates will fail
See `references/operational-monitoring.md` for detailed patterns, templates, and troubleshooting.
**Important Constraints (Cron Mode)**:
- DO NOT transition ticket statuses (D-05) - only add comments
- DO NOT update stale sessions (D-15) - only active within 7 days
- Use silent mode `[SILENT]` if no active sessions found
## Verification
1. On session start, agent checks for similar sessions via `hindsight_recall` ✓
2. Jira Task ticket created (or user declined) ✓
3. Confluence docs loaded by `ngn-agent` tag search (or user declined) ✓
4. At session end, user prompted for Jira update ✓
5. At session end, user prompted for Confluence update ✓
6. Session summary automatically saved to hindsight via `hindsight_retain` (no prompt) ✓
7. **Operational cron jobs can discover active sessions and update Jira tickets** ✓
SKILL
# Session reference file
cat > "$HOME/.hermes/skills/ngn-agent/session/references/operational-monitoring.md" << 'REF'
# Operational Session Monitoring
## Jira Ticket Pattern Detection
When scanning session content for associated Jira tickets, search for these patterns:
```python
jira_patterns = [
r'(PLATFORM-\d+)', # Platform engineering tickets
r'(AIOPS-\d+)', # AI Operations tickets
r'(RAID-\d+)', # RAID project tickets
r'(DEVOPS-\d+)', # DevOps tickets
r'(QAC-\d+)' # QAC tickets
]
```
## Session Export vs List Commands
**CORRECT**: `hermes sessions export -`
- Returns machine-readable JSONL format
- Each line is a complete session object
- Includes `last_active` timestamps for filtering
**INCORRECT**: `hermes sessions list --json`
- The `--json` flag does not exist (Pitfall from RESEARCH.md)
- Use export for automation, list for human viewing only
## Environment Variable Requirements
The `ngn-jira` tool wrapper expects:
- `JIRA_EMAIL` - Atlassian account email
- `JIRA_API_TOKEN` - From https://id.atlassian.com/manage/api-tokens
Missing either variable causes: `bash: line 10: JIRA_EMAIL: unbound variable`
## Telegram Report Template
```
📋 **ACTIVE SESSIONS** — {date}
🔹 **{session_id}**
Title: {title}
Last Active: {timestamp}
Jira: {ticket_keys or "None"}
🔄 **JIRA UPDATED**: {list of updated ticket keys}
❌ **ISSUES**: {any operational problems}
📊 **SUMMARY**: {count} active sessions found, {count} with Jira tickets
```
Character limit: 4096 for Telegram delivery
REF
echo " ✓ session/SKILL.md + references written"
}
# ---- Register cron jobs (D-10) ----
register_cron_jobs() {
echo " → Registering cron jobs (D-10)..."
# 1. ngn-daily-report (daily at 09:00 SGT)
echo " → Creating ngn-daily-report..."
hermes cron create --deliver telegram --skill session --skill jira-query \
'0 9 * * *' \
'Daily session report. Export sessions, find active ones, check Jira, compose Telegram summary.' \
2>/dev/null && echo " ✓ ngn-daily-report registered" \
|| echo " ⚠ ngn-daily-report may already exist"
# 2. ngn-weekly-stale-summary (Sunday 20:00 SGT)
echo " → Creating ngn-weekly-stale-summary..."
hermes cron create --deliver telegram --skill session \
'0 20 * * 0' \
'Weekly stale session summary. Review sessions inactive >30 days, compose Telegram summary.' \
2>/dev/null && echo " ✓ ngn-weekly-stale-summary registered" \
|| echo " ⚠ ngn-weekly-stale-summary may already exist"
# 3. ngn-weekly-archive (Sunday 20:05 SGT — 5 min after summary, per D-10)
echo " → Creating ngn-weekly-archive..."
hermes cron create --no-agent --script archive-stale-sessions.sh \
'5 20 * * 0' \
2>/dev/null && echo " ✓ ngn-weekly-archive registered" \
|| echo " ⚠ ngn-weekly-archive may already exist"
}
# ---- Offer gateway restart (per CONTEXT.md "Specific Ideas") ----
offer_gateway_restart() {
echo ""
echo "==> Setup complete!"
echo ""
read -p "Restart Hermes gateway now? [Y/n]: " restart
if [[ "$restart" =~ ^[Yy]?$ ]]; then
hermes gateway restart
echo " → Gateway restarted."
else
echo " → Skipped. Run 'hermes gateway restart' when ready."
fi
}
# =============================================================================
# Main Execution Block
# =============================================================================
main() {
echo ""
echo "=== ngn-agent Setup Script ==="
echo "Embedded file snapshots frozen at: 2026-06-15"
echo ""
# Step 1: Parse arguments (already done above)
# Step 2: Check prerequisites
echo "[1/14] Checking prerequisites..."
check_prerequisites
# Step 3: Print path summary
print_summary
# Step 4: Prompt for secrets
echo "[2/14] Collecting secrets..."
if [ "$NONINTERACTIVE" = false ]; then
JIRA_API_TOKEN=$(prompt_secret "JIRA_API_TOKEN" "JIRA API Token (https://id.atlassian.com/manage/api-tokens): ")
JIRA_EMAIL=$(prompt_secret "JIRA_EMAIL" "JIRA Email: ")
TELEGRAM_BOT_TOKEN=$(prompt_secret "TELEGRAM_BOT_TOKEN" "Telegram Bot Token (from @BotFather): ")
OPENROUTER_API_KEY=$(prompt_secret "OPENROUTER_API_KEY" "OpenRouter API Key (leave blank to keep existing): " "true")
else
echo " → Non-interactive mode — using environment variables"
: "${JIRA_API_TOKEN:?JIRA_API_TOKEN not set}"
: "${JIRA_EMAIL:?JIRA_EMAIL not set}"
: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN not set}"
fi
echo " ✓ Secrets collected"
# Step 5: Create directories
echo "[3/14] Creating directories..."
create_directories
# Step 6: Backup existing config
echo "[4/14] Backing up existing config..."
backup_config
# Step 7: Generate config.yaml
echo "[5/14] Generating config.yaml..."
generate_config_yaml
# Step 8: Generate .env
echo "[6/14] Generating .env..."
generate_env_file
# Step 9: Generate hindsight config
echo "[7/14] Generating hindsight config..."
generate_hindsight_config
# Step 10: Generate cron env config
echo "[8/14] Configuring cron environment..."
generate_cron_env_config
# Step 11: Write session-init script
echo "[9/14] Writing session-init script..."
write_session_init_script
# Step 12: Write archive script
echo "[10/14] Writing archive script..."
write_archive_script
# Step 13: Write skill files
echo "[11/14] Writing skill files..."
write_jira_skill
write_aws_skill
write_confluence_skill
write_bitbucket_skill
write_session_skill
# Step 14: Register cron jobs
echo "[12/14] Registering cron jobs..."
register_cron_jobs || echo " ⚠ Cron registration had issues (may already exist)"
echo "[13/14] Setup complete."
echo "[14/14] Offering gateway restart..."
offer_gateway_restart
}
main "$@"