diff --git a/ngn-agent/setup-ngn-agent.sh b/ngn-agent/setup-ngn-agent.sh index c038735..bf7a390 100755 --- a/ngn-agent/setup-ngn-agent.sh +++ b/ngn-agent/setup-ngn-agent.sh @@ -394,3 +394,947 @@ generate_cron_env_config() { echo " ✓ Cron env vars configured" } + +# ============================================================================= +# Task 3: File/Cron Setup (D-10) +# ============================================================================= + +# ---- Write session-init.sh (D-10) ---- +write_session_init_script() { + echo " → Writing session-init.sh (D-10)..." + cat > "$HOME/.hermes/scripts/session-init.sh" << 'SCRIPT' +#!/bin/bash +# session-init.sh — Verify DEFAULT_REPOS mounts at session start +# Runs via shell_init_files before agent prompt. Non-blocking. +# Reads DEFAULT_REPOS from environment (forwarded via docker_forward_env). +set -uo pipefail + +DEFAULT_REPOS="${DEFAULT_REPOS:-}" + +if [ -z "$DEFAULT_REPOS" ]; then + echo "[session-init] DEFAULT_REPOS not set — skipping verification" + exit 0 +fi + +# Split comma-separated list +IFS=',' read -ra REPOS <<< "$DEFAULT_REPOS" +ALL_OK=true + +for repo in "${REPOS[@]}"; do + # Trim whitespace + repo="${repo#"${repo%%[![:space:]]*}"}" + repo="${repo%"${repo##*[![:space:]]}"}" + + if [ -d "/workspace/$repo/.git" ]; then + echo "[session-init] ✓ $repo — mounted at /workspace/$repo" + else + echo "[session-init] ⚠ $repo — NOT FOUND at /workspace/$repo" + ALL_OK=false + fi +done + +if [ "$ALL_OK" = true ]; then + echo "[session-init] All DEFAULT_REPOS verified" +else + echo "[session-init] Some repos missing — check docker_volumes in config.yaml" +fi + +exit 0 # always exit cleanly — non-blocking +SCRIPT + chmod +x "$HOME/.hermes/scripts/session-init.sh" + echo " ✓ session-init.sh written and executable" +} + +# ---- Write archive-stale-sessions.sh (D-10) ---- +write_archive_script() { + echo " → Writing archive-stale-sessions.sh (D-10)..." + cat > "$HOME/.hermes/scripts/archive-stale-sessions.sh" << 'SCRIPT' +#!/bin/bash +# Archive stale sessions (inactive >30 days) and prune from live DB +# This script runs via hermes cron with --no-agent +# Stdout is delivered to Telegram via --deliver telegram +# Dry-run mode: export only, no prune — safe default for first run +set -euo pipefail + +DRY_RUN=true + +ARCHIVE_DIR="$HOME/.hermes/archive/sessions" +mkdir -p "$ARCHIVE_DIR" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +OUTPUT_FILE="$ARCHIVE_DIR/sessions-${TIMESTAMP}.jsonl" + +echo "=== Stale Session Archive ===" +echo "Started: $(date)" +echo "Dry run: $DRY_RUN" +echo "" + +echo "[1/3] Exporting session store..." +echo " Output: $OUTPUT_FILE" +hermes sessions export "$OUTPUT_FILE" +echo " -> $(wc -l < "$OUTPUT_FILE") sessions exported" +echo " -> Size: $(du -h "$OUTPUT_FILE" | cut -f1)" +echo "" + +if [ "$DRY_RUN" = false ]; then + echo "[2/3] Pruning sessions older than 30 days..." + hermes sessions prune --older-than 30 --yes + echo " Done." +else + echo "[2/3] SKIPPED (dry run) — set DRY_RUN=false to enable prune" + echo " Review $OUTPUT_FILE before enabling." +fi +echo "" + +echo "[3/3] Post-archive stats:" +hermes sessions stats +echo "" + +echo "✓ Archive complete." +SCRIPT + chmod +x "$HOME/.hermes/scripts/archive-stale-sessions.sh" + echo " ✓ archive-stale-sessions.sh written and executable" +} + +# ---- Write skill files (D-10) ---- +write_jira_skill() { + mkdir -p "$HOME/.hermes/skills/ngn-agent/jira" + cat > "$HOME/.hermes/skills/ngn-agent/jira/SKILL.md" << 'SKILL' +--- +name: jira-query +description: Query Jira Cloud issues, search, and manage tickets +metadata: + hermes: + tags: [jira, project-management] + category: devops + requires_toolsets: [terminal] +version: 1.0.0 +--- +# Jira Cloud Query + +## When to Use +When the user asks to search Jira issues, check ticket status, or list project work. + +## Procedure + +### 1. Search issues by JQL +```bash +ngn-jira GET '/rest/api/3/search?jql=ORDER BY created DESC&maxResults=10' +``` + +For specific project: +```bash +ngn-jira GET '/rest/api/3/search?jql=project=PROJ ORDER BY created DESC&maxResults=10' +``` + +### 2. Get issue details +```bash +ngn-jira GET '/rest/api/3/issue/PROJ-123' +``` + +### 3. List sprints (if Jira Software) +```bash +ngn-jira GET '/rest/agile/1.0/board' +ngn-jira GET '/rest/agile/1.0/board/{boardId}/sprint?state=active' +``` + +### 4. Get issue comments +```bash +ngn-jira GET '/rest/api/3/issue/PROJ-123/comment' +``` + +## Pitfalls +- JQL is case-sensitive for field names +- maxResults defaults to 50; set explicitly for large queries +- Agile REST API may not be available on all plans + +## Required Environment +- `JIRA_EMAIL` — your Atlassian account email +- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens +SKILL + echo " ✓ jira/SKILL.md written" +} + +write_aws_skill() { + mkdir -p "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references" + cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/SKILL.md" << 'SKILL' +--- +name: aws-diagnostics +description: Read-only AWS diagnostics for platform engineering +metadata: + hermes: + tags: [aws, diagnostics, platform-engineering] + category: devops + requires_toolsets: [terminal] +version: 1.0.0 +--- +# AWS Diagnostics + +## When to Use +When the user asks to check AWS resources, investigate issues, or audit infrastructure in any account. + +## Important +- ALWAYS determine the correct AWS_PROFILE before running commands +- NEVER run mutating AWS commands (delete, terminate, stop, modify) +- Prefer read-only AWS CLI commands (describe, list, get) + +## Procedure + +### 1. Identify the target account +Ask the user which account/environment they want to target. Available profiles in `/.aws/config`: +- `rzaws-sw-rai-ava-dev/prod/rc` — AVA service +- `rzaws-sw-rai-cs-dev/prod/rc` — CS service +- `rzaws-sw-rai-qac-dev/prod` — QAC +- `rzaws-sw-rai-ops` — Ops account +- `rzaws-sw-rai-voicekit-dev/prod/rc` — VoiceKit +- `rzaws-sw-rai-preprod` — Pre-production +- `rzaws-sw-rai-nonprod` — Non-production + +### 2. Set the profile +```bash +export AWS_PROFILE=rzaws-sw-rai-- +``` + +### 3. Diagnostic commands + +**EC2 instances:** +```bash +aws ec2 describe-instances --query 'Reservations[*].Instances[*].[InstanceId,State.Name,InstanceType,Tags[?Key==`Name`].Value|[0]]' --output table +``` + +**ECS services:** +```bash +aws ecs list-clusters && aws ecs list-services --cluster +``` + +**S3 buckets:** +```bash +aws s3 ls +``` + +**CloudWatch alarms:** +```bash +aws cloudwatch describe-alarms --state-value ALARM --output table +``` + +**ECS task health:** +```bash +aws ecs describe-tasks --cluster --tasks +``` + +**RDS instances:** +```bash +aws rds describe-db-instances --query 'DBInstances[*].[DBInstanceIdentifier,DBInstanceStatus,Engine,DBInstanceClass]' --output table +``` + +**Lambda functions:** +```bash +aws lambda list-functions --query 'Functions[*].[FunctionName,Runtime,LastModified]' --output table +``` + +**ELB target group health:** +```bash +aws elbv2 describe-target-groups --query 'TargetGroups[*].[TargetGroupName,TargetType]' --output table +``` + +### 4. Report findings +Format as a concise table. Include account ID and profile used. + +## Alternative: Infrastructure Code Analysis + +When AWS CLI access is unavailable (Docker containers, credential issues), examine existing infrastructure code instead: + +```bash +# Search for region patterns +search_files --pattern="us-west-2" --path="/workspace" + +# Check terraform configurations +read_file /workspace/rai-ops/aws//us-east-1/app/main.tf +read_file /workspace/rai-ops/aws//us-east-1/app/.tfvars + +# Look for provider configurations +search_files --pattern="provider.*replica" --path="/workspace/rai-ops" + +# Check S3 migration data +search_files --pattern="s3-mapping" --target="files" +``` + +**When to use code analysis:** +- Docker container with read-only filesystem +- Missing AWS CLI or credentials +- Need to understand intended architecture vs live state +- Investigating multiregional setup patterns + +## Pitfalls +- SSO tokens expire (~6-8h). If you get auth errors, ask the user to run `aws sso login` +- Some accounts may not have all services — check `aws sts get-caller-identity` first +- Don't pipe large results directly — use `--query` and `--output table` for readability +- **Don't persist with CLI installation in constrained environments** — switch to code analysis quickly when installation fails + +## Verification +Run `aws sts get-caller-identity` to confirm the correct profile is active before running diagnostics. + +## References +- `references/multiregional-patterns.md` - Terraform patterns for cross-region infrastructure setup +SKILL + + # AWS reference file + cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references/multiregional-patterns.md" << 'REF' +# Multiregional Infrastructure Patterns + +## AVA Multiregional Setup + +### Provider Configuration Pattern +```hcl +# Primary provider (us-east-1) +provider "aws" { + region = var.region + # ... assume_role block +} + +# Replica provider (us-west-2) +provider "aws" { + alias = "replica" + region = "us-west-2" + # ... same assume_role block +} +``` + +### Module Consumption +```hcl +module "app" { + providers = { + aws = aws + aws.replica = aws.replica # Required by tf-modules/app/versions.tf + } + # ... other config +} +``` + +### Database Replication Options + +**RDS Aurora PostgreSQL (Current Pattern)** +- Engine: `aurora-postgresql` +- Version: `16.11` +- Cross-region read replicas supported +- Can promote replica for DR scenarios + +**DynamoDB Global Tables (Available)** +- Global Table v2 with us-east-1 + us-west-2 replicas +- Per-region CMKs for encryption +- Feature-flagged via `var.tenant_registry` +- Documented in RAID-352 + +### S3 Cross-Region Replication +Extensive existing pattern from migration data: +- `ava-{env}-west-*` buckets in us-west-2 +- Matching `rai-s3-usw2-*` naming convention +- Covers: bug reports, screenshots, game logs, shiny moments + +### Key Files for Multiregional Analysis +- `aws//us-east-1/app/provider.tf` - Replica provider config +- `aws//us-east-1/app/.tfvars` - App-specific resources +- `raid-migration/raid-s3-migration/s3-mapping.csv` - Cross-region S3 inventory +- `RAID-352-PR-DESCRIPTION.md` - DynamoDB Global Tables documentation +REF + echo " ✓ aws-diagnostics/SKILL.md + references written" +} + +write_confluence_skill() { + mkdir -p "$HOME/.hermes/skills/ngn-agent/confluence" + cat > "$HOME/.hermes/skills/ngn-agent/confluence/SKILL.md" << 'SKILL' +--- +name: confluence-search +description: Search and retrieve Confluence pages +metadata: + hermes: + tags: [confluence, documentation] + category: devops + requires_toolsets: [terminal] +version: 1.0.0 +--- +# Confluence Search + +## When to Use +When the user asks to find documentation, search Confluence pages, or retrieve page content. + +## Procedure + +### 1. Search pages by text +```bash +ngn-confluence GET '/rest/api/search?cql=text~"search terms"&limit=10' +``` + +### 2. Search by space +```bash +ngn-confluence GET '/rest/api/search?cql=space=ADM&limit=10' +``` + +### 3. Get page content +```bash +ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage' +``` + +### 4. List pages in space +```bash +ngn-confluence GET '/rest/api/content?spaceKey=ADM&limit=50' +``` + +### 5. Get page children +```bash +ngn-confluence GET '/rest/api/content/{pageId}/child/page?limit=50' +``` + +## Pitfalls +- CQL is different from JQL — `text~"query"` for full-text search +- Page body needs `expand=body.storage` to retrieve content +- Use `limit` parameter — defaults to 25 + +## Required Environment +- `JIRA_EMAIL` — your Atlassian account email +- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens +SKILL + echo " ✓ confluence/SKILL.md written" +} + +write_bitbucket_skill() { + mkdir -p "$HOME/.hermes/skills/ngn-agent/bitbucket" + cat > "$HOME/.hermes/skills/ngn-agent/bitbucket/SKILL.md" << 'SKILL' +--- +name: bitbucket-pr +description: Review Bitbucket pull requests and repositories +metadata: + hermes: + tags: [bitbucket, git, code-review] + category: devops + requires_toolsets: [terminal] +version: 1.0.0 +--- +# Bitbucket Pull Requests + +## When to Use +When the user asks to check PRs, review code, or list repositories. + +## Procedure + +### 1. List repositories +```bash +ngn-bitbucket GET '/repositories/razersw?pagelen=20' +``` + +### 2. List open PRs for a repo +```bash +ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests?state=OPEN&pagelen=20' +``` + +### 3. Get PR details +```bash +ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}' +``` + +### 4. Get PR diff +```bash +ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/diff' +``` + +### 5. Get PR comments +```bash +ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/comments' +``` + +### 6. List branch list +```bash +ngn-bitbucket GET '/repositories/razersw/{repo}/refs/branches?pagelen=20' +``` + +## Pitfalls +- Bitbucket pagination uses `pagelen` and `page` params (not `maxResults`) +- Diff endpoint returns raw diff text — may be large +- PR comments include inline code comments, not just summary + +## Required Environment +- `JIRA_EMAIL` — your Atlassian account email +- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens +SKILL + echo " ✓ bitbucket/SKILL.md written" +} + +write_session_skill() { + mkdir -p "$HOME/.hermes/skills/ngn-agent/session/references" + cat > "$HOME/.hermes/skills/ngn-agent/session/SKILL.md" << 'SKILL' +--- +name: session +description: Main ngn-agent session lifecycle — init, work, close +metadata: + hermes: + tags: [ngn-agent, platform-engineering, session] + category: devops + requires_toolsets: [terminal] +version: 1.0.0 +--- + +# ngn-agent Session Lifecycle + +## When to Use + +Load this skill at the START of EVERY platform engineering session, before any other work. This skill defines the standard session workflow. + +Specific triggers: +- When the user starts any infrastructure or platform engineering task +- When the user asks to create a Jira ticket or find a ticket +- When the user wants to search or load Confluence documentation +- When a session is ending and you need to document progress +- When you need to save context for future sessions + +## Important + +- **Keep this skill loaded for the entire session** — if context grows large, reload via `skill_view("session")` before the session-end steps (Steps 5–7) +- **Never create Jira tickets without asking the user first** (D-02) +- **Never update Confluence without asking the user first** (D-11) +- **Always save session summary to hindsight at end** — this step has no user prompt, it is automatic (D-12) +- User must confirm before any Jira mutation (create, comment, transition) — D-08 +- Repos are already mounted at `/workspace/` from Phase 6 (rai-ops, rai-deployment, rai-devtools) +- This skill replaces the ad-hoc session workflow with a repeatable init→work→close pattern + +## Procedure + +### 1. Check for Similar Previous Sessions + +At the very start of a session, use `hindsight_recall` with a query describing the user's current task to find similar sessions from the last 2 weeks. + +Call `hindsight_recall` with a budget of low: + +``` +Tool: hindsight_recall +Query: "" +Budget: low +``` + +Present any matches to the user in this format: + +``` +Found [N] similar sessions from the last 2 weeks: +1. [Session Title] — [Date] — [one-line summary] +2. [Session Title] — [Date] — [one-line summary] +``` + +Ask the user: "Would you like to resume any of these sessions, or start fresh?" +- If they choose to resume: load that session's context and continue +- If they choose fresh: proceed to step 2 + +If no similar sessions are found (normal for first sessions), proceed to step 2. + +### 2. Prompt: Create Jira Ticket + +Ask the user: "Would you like to create a Jira Task ticket for this session?" + +If YES: +1. Ask which Jira project to use (e.g., "PLATFORM", "DEVOPS") — do not hardcode (D-06) +2. Check hindsight for cached epics: + + ``` + Tool: hindsight_recall + Query: "jira epics cached" + Budget: low + ``` + +3. If epics are cached, check the cache timestamp: + - If the cache is more than 24 hours old OR the user says the list looks wrong, refresh from Jira: + ```bash + ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=&fields=summary,id&maxResults=50' + ``` + Save fresh epics to hindsight: + ``` + Tool: hindsight_retain + tier: "epic-cache" + content: "Epic Cache []: PROJECT=: [EPIC-KEY-1: Summary, EPIC-KEY-2: Summary, ...]" + ``` + - If the cache is fresh (less than 24 hours old), use the cached list +4. If no cached epics found, query Jira for current epics: + ```bash + ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=&fields=summary,id&maxResults=50' + ``` + Save to hindsight for future sessions: + ``` + Tool: hindsight_retain + tier: "epic-cache" + content: "Epic Cache []: PROJECT=: [EPIC-KEY-1: Summary, ...]" + ``` +5. Present cached/refreshed epics to the user: "Available epics: [list]. Would you like to set a parent epic?" +6. If user selects an epic, include it as parent when creating the ticket +7. Create the Task via Jira REST API: + + ```bash + ngn-jira POST '/rest/api/3/issue' --body '{ + "fields": { + "project": {"key": ""}, + "summary": "", + "issuetype": {"name": "Task"}, + "parent": {"key": ""} + } + }' + ``` + +8. Note the ticket key (e.g., `PLATFORM-123`) — save it for session-end steps (Step 5) + +If NO: proceed to step 3 (no Jira ticket this session) + +### 3. Prompt: Load Confluence Documentation + +Ask the user: "Would you like to load relevant Confluence documentation?" + +If YES: +1. Search by the `ngn-agent` tag: + + ```bash + ngn-confluence GET '/rest/api/search?cql=tag="ngn-agent"&limit=20' + ``` + +2. Present matching pages to the user: + ``` + Found [N] pages tagged 'ngn-agent': + - [Title] — [Space] — [Last Modified] + ``` + +3. Ask: "Which pages would you like me to load?" +4. For each selected page, load its full content: + + ```bash + ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage' + ``` + +5. Review the loaded content with the user + +If NO: proceed to step 4 + +### 4. Work Phase + +Repos are already mounted at `/workspace/` (rai-ops, rai-deployment, rai-devtools). Proceed with the task using standard Hermes tools. + +If you need to clone additional repos: +```bash +git clone git@bitbucket.org:razersw/.git /workspace/ +``` + +The session skill remains loaded for the session-end steps below. If the skill is evicted from context during a long session, reload it with `skill_view("session")` before proceeding to Steps 5–7. + +### 5. Session-End: Update Jira + +When the user indicates work is complete or the session wraps up: + +Ask the user: "Would you like me to update the Jira ticket with a summary comment?" + +If YES (and a ticket was created in Step 2): +```bash +ngn-jira POST '/rest/api/3/issue//comment' --body '{ + "body": "" +}' +``` + +If NO: proceed without updating Jira. + +**Important (D-08):** Do NOT transition tickets (e.g., close, resolve, move to Done) without explicit user confirmation. Only add comments unless the user specifically asks for a status change. + +### 6. Session-End: Update Confluence + +Ask the user: "Would you like me to create or update a Confluence page documenting this session?" + +If YES: +- For a new page: + ```bash + ngn-confluence POST '/rest/api/content' --body '{ + "type": "page", + "title": ": ", + "space": {"key": ""}, + "body": { + "storage": { + "value": "

Session Summary

", + "representation": "storage" + } + }, + "metadata": { + "properties": { + "content-appearance": {"value": "page"} + } + }, + "labels": [{"name": "ngn-agent"}] + }' + ``` + +- For updating an existing page: ask the user which page to update, then PUT to update its content + +- **Important (D-11):** Do NOT create or update any Confluence page without the user confirming first + +If NO: proceed without updating Confluence. + +### 7. Session-End: Save to Hindsight (Automatic — No Prompt) + +ALWAYS save a session summary to hindsight memory. Do NOT ask the user — this step is automatic and unconditional (D-12). + +```bash +Tool: hindsight_retain +tier: "session-summary" +content: " +Session Summary +=============== +Date: +Task: +Repos: +Jira: +Key Decisions: +- +- +Outcomes: +- +- +Next Steps: +- +" +``` + +This summary allows future `hindsight_recall` queries to find this session for similarity matching (D-13). The structured content includes: date, task description, repos worked on, Jira ticket reference (or "none"), key decisions, outcomes, and next steps. + +## Pitfalls + +- **Skill not loaded at session start:** If you find yourself midway through a session without having run Steps 1–3, you missed the session start workflow. Run Step 1 (hindsight_recall) retroactively and ask the user if they want to create a Jira ticket or load Confluence docs. For future sessions, make sure to load this skill at the very start. +- **Epic cache too old:** Epics may change between sessions. Check the cache timestamp and refresh if more than 24 hours old. If the user says "that's wrong," always refresh regardless of age. +- **Confluence tag mismatch:** If the `ngn-agent` tag returns no results, try `platform-engineering` as a fallback, or ask the user what tag they use for session documentation. +- **Jira project doesn't exist:** If the create ticket call fails with a 404, the project key may be wrong. Ask the user to confirm the correct project key. +- **Empty hindsight recall (first sessions):** The first few sessions will have no similar sessions to find. That is normal — proceed with a fresh session. Over time, hindsight will accumulate session summaries. +- **Long sessions may evict this skill:** If the conversation grows long, the session skill content may be evicted from the agent's context. Reload it with `skill_view("session")` before the session-end steps (Steps 5–7) to ensure the Jira/Confluence prompts and hindsight save are not missed. +- **Missing Jira credentials in cron jobs:** The ngn-jira tool requires both `JIRA_EMAIL` and `JIRA_API_TOKEN` environment variables. If either is missing, Jira operations will fail with "unbound variable" errors. Check environment setup before attempting Jira updates in automated workflows. + +## Operational Automation + +### Daily Session Monitoring (Cron Job) + +When running as a scheduled cron job for operational monitoring: + +1. **Discover Active Sessions**: + ```bash + hermes sessions export - # NOT 'hermes sessions list' - no --json flag available + ``` + Parse JSONL output with Python to find sessions with `last_active` within last 7 days + +2. **Find Associated Jira Tickets**: + - Use `hindsight_recall` with query 'session summary jira' for each active session + - Search session messages for Jira patterns: `PLATFORM-\d+`, `AIOPS-\d+`, `RAID-\d+`, etc. + - Note: One session may have multiple Jira tickets (1-to-many mapping) + +3. **Update Jira with Progress**: + ```bash + ngn-jira POST '/rest/api/3/issue//comment' --body '{ + "body": "Session activity update — Date: , Last active: . Session: . Progress: See session transcript for details." + }' + ``` + +4. **Generate Telegram Report**: + - Structure: Active Sessions + Jira Updated + Issues/Summary + - Keep under 4096 character limit + - Format with emoji sections for clarity + +**Environment Requirements for Operational Jobs**: +- `JIRA_EMAIL` — Required for ngn-jira authentication +- `JIRA_API_TOKEN` — API token from Atlassian account +- Both must be set or Jira updates will fail + +See `references/operational-monitoring.md` for detailed patterns, templates, and troubleshooting. + +**Important Constraints (Cron Mode)**: +- DO NOT transition ticket statuses (D-05) - only add comments +- DO NOT update stale sessions (D-15) - only active within 7 days +- Use silent mode `[SILENT]` if no active sessions found + +## Verification + +1. On session start, agent checks for similar sessions via `hindsight_recall` ✓ +2. Jira Task ticket created (or user declined) ✓ +3. Confluence docs loaded by `ngn-agent` tag search (or user declined) ✓ +4. At session end, user prompted for Jira update ✓ +5. At session end, user prompted for Confluence update ✓ +6. Session summary automatically saved to hindsight via `hindsight_retain` (no prompt) ✓ +7. **Operational cron jobs can discover active sessions and update Jira tickets** ✓ +SKILL + + # Session reference file + cat > "$HOME/.hermes/skills/ngn-agent/session/references/operational-monitoring.md" << 'REF' +# Operational Session Monitoring + +## Jira Ticket Pattern Detection + +When scanning session content for associated Jira tickets, search for these patterns: + +```python +jira_patterns = [ + r'(PLATFORM-\d+)', # Platform engineering tickets + r'(AIOPS-\d+)', # AI Operations tickets + r'(RAID-\d+)', # RAID project tickets + r'(DEVOPS-\d+)', # DevOps tickets + r'(QAC-\d+)' # QAC tickets +] +``` + +## Session Export vs List Commands + +**CORRECT**: `hermes sessions export -` +- Returns machine-readable JSONL format +- Each line is a complete session object +- Includes `last_active` timestamps for filtering + +**INCORRECT**: `hermes sessions list --json` +- The `--json` flag does not exist (Pitfall from RESEARCH.md) +- Use export for automation, list for human viewing only + +## Environment Variable Requirements + +The `ngn-jira` tool wrapper expects: +- `JIRA_EMAIL` - Atlassian account email +- `JIRA_API_TOKEN` - From https://id.atlassian.com/manage/api-tokens + +Missing either variable causes: `bash: line 10: JIRA_EMAIL: unbound variable` + +## Telegram Report Template + +``` +📋 **ACTIVE SESSIONS** — {date} + +🔹 **{session_id}** + Title: {title} + Last Active: {timestamp} + Jira: {ticket_keys or "None"} + +🔄 **JIRA UPDATED**: {list of updated ticket keys} + +❌ **ISSUES**: {any operational problems} + +📊 **SUMMARY**: {count} active sessions found, {count} with Jira tickets +``` + +Character limit: 4096 for Telegram delivery +REF + echo " ✓ session/SKILL.md + references written" +} + +# ---- Register cron jobs (D-10) ---- +register_cron_jobs() { + echo " → Registering cron jobs (D-10)..." + + # 1. ngn-daily-report (daily at 09:00 SGT) + echo " → Creating ngn-daily-report..." + hermes cron create --deliver telegram --skill session --skill jira-query \ + '0 9 * * *' \ + 'Daily session report. Export sessions, find active ones, check Jira, compose Telegram summary.' \ + 2>/dev/null && echo " ✓ ngn-daily-report registered" \ + || echo " ⚠ ngn-daily-report may already exist" + + # 2. ngn-weekly-stale-summary (Sunday 20:00 SGT) + echo " → Creating ngn-weekly-stale-summary..." + hermes cron create --deliver telegram --skill session \ + '0 20 * * 0' \ + 'Weekly stale session summary. Review sessions inactive >30 days, compose Telegram summary.' \ + 2>/dev/null && echo " ✓ ngn-weekly-stale-summary registered" \ + || echo " ⚠ ngn-weekly-stale-summary may already exist" + + # 3. ngn-weekly-archive (Sunday 20:05 SGT — 5 min after summary, per D-10) + echo " → Creating ngn-weekly-archive..." + hermes cron create --no-agent --script archive-stale-sessions.sh \ + '5 20 * * 0' \ + 2>/dev/null && echo " ✓ ngn-weekly-archive registered" \ + || echo " ⚠ ngn-weekly-archive may already exist" +} + +# ---- Offer gateway restart (per CONTEXT.md "Specific Ideas") ---- +offer_gateway_restart() { + echo "" + echo "==> Setup complete!" + echo "" + read -p "Restart Hermes gateway now? [Y/n]: " restart + if [[ "$restart" =~ ^[Yy]?$ ]]; then + hermes gateway restart + echo " → Gateway restarted." + else + echo " → Skipped. Run 'hermes gateway restart' when ready." + fi +} + +# ============================================================================= +# Main Execution Block +# ============================================================================= +main() { + echo "" + echo "=== ngn-agent Setup Script ===" + echo "Embedded file snapshots frozen at: 2026-06-15" + echo "" + + # Step 1: Parse arguments (already done above) + # Step 2: Check prerequisites + echo "[1/14] Checking prerequisites..." + check_prerequisites + + # Step 3: Print path summary + print_summary + + # Step 4: Prompt for secrets + echo "[2/14] Collecting secrets..." + if [ "$NONINTERACTIVE" = false ]; then + JIRA_API_TOKEN=$(prompt_secret "JIRA_API_TOKEN" "JIRA API Token (https://id.atlassian.com/manage/api-tokens): ") + JIRA_EMAIL=$(prompt_secret "JIRA_EMAIL" "JIRA Email: ") + TELEGRAM_BOT_TOKEN=$(prompt_secret "TELEGRAM_BOT_TOKEN" "Telegram Bot Token (from @BotFather): ") + OPENROUTER_API_KEY=$(prompt_secret "OPENROUTER_API_KEY" "OpenRouter API Key (leave blank to keep existing): " "true") + else + echo " → Non-interactive mode — using environment variables" + : "${JIRA_API_TOKEN:?JIRA_API_TOKEN not set}" + : "${JIRA_EMAIL:?JIRA_EMAIL not set}" + : "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN not set}" + fi + echo " ✓ Secrets collected" + + # Step 5: Create directories + echo "[3/14] Creating directories..." + create_directories + + # Step 6: Backup existing config + echo "[4/14] Backing up existing config..." + backup_config + + # Step 7: Generate config.yaml + echo "[5/14] Generating config.yaml..." + generate_config_yaml + + # Step 8: Generate .env + echo "[6/14] Generating .env..." + generate_env_file + + # Step 9: Generate hindsight config + echo "[7/14] Generating hindsight config..." + generate_hindsight_config + + # Step 10: Generate cron env config + echo "[8/14] Configuring cron environment..." + generate_cron_env_config + + # Step 11: Write session-init script + echo "[9/14] Writing session-init script..." + write_session_init_script + + # Step 12: Write archive script + echo "[10/14] Writing archive script..." + write_archive_script + + # Step 13: Write skill files + echo "[11/14] Writing skill files..." + write_jira_skill + write_aws_skill + write_confluence_skill + write_bitbucket_skill + write_session_skill + + # Step 14: Register cron jobs + echo "[12/14] Registering cron jobs..." + register_cron_jobs || echo " ⚠ Cron registration had issues (may already exist)" + + echo "[13/14] Setup complete." + echo "[14/14] Offering gateway restart..." + offer_gateway_restart +} + +main "$@"