- write_session_init_script: mount verification via shell_init_files (D-10) - write_archive_script: DRY_RUN=true archive script for stale sessions (D-10) - write_jira_skill, write_aws_skill, write_confluence_skill, write_bitbucket_skill, write_session_skill: all 5 skills with 2 reference files embedded as heredocs - register_cron_jobs: 3 cron jobs via hermes cron create (ngn-daily-report, ngn-weekly-stale-summary, ngn-weekly-archive) - offer_gateway_restart: prompt to restart Hermes gateway at end - Main execution block [1/14] through [14/14] with progress indicators - Best-effort error handling for non-critical steps - D-10 referenced throughout for traceability
1341 lines
44 KiB
Bash
Executable File
1341 lines
44 KiB
Bash
Executable File
#!/bin/bash
|
||
# setup-ngn-agent.sh — Portable ngn-agent configuration setup
|
||
#
|
||
# Phase 9, Plan 2 — recreates all configuration on a fresh macOS machine
|
||
# Assumes Hermes v0.16+ is installed (per D-07)
|
||
#
|
||
# Embedded file snapshots frozen at: 2026-06-15
|
||
# Regenerate by re-running this phase.
|
||
#
|
||
# D-06: Single script recreating all ngn-agent configuration
|
||
# D-07: Requires Hermes v0.16+ on PATH
|
||
# D-08: Interactive secrets: JIRA_API_TOKEN, JIRA_EMAIL, TELEGRAM_BOT_TOKEN, OPENROUTER_API_KEY
|
||
# D-09: Configurable paths via arguments (SSH keys, repos, timezone)
|
||
# D-10: Creates/updates: config.yaml, .env, hindsight/config.json, scripts, skills, cron jobs
|
||
#
|
||
set -euo pipefail
|
||
|
||
# ---- Usage ----
|
||
usage() {
|
||
cat <<'USAGE'
|
||
usage: setup-ngn-agent.sh [OPTIONS]
|
||
|
||
Portable ngn-agent configuration setup for macOS + Hermes v0.16+
|
||
|
||
Options:
|
||
-s1, --ssh-key-1 PATH SSH private key path 1 (default: ~/.ssh/id_ed25519razer)
|
||
-s2, --ssh-key-2 PATH SSH private key path 2 (default: ~/.ssh/id_rsa)
|
||
-sc, --ssh-config PATH SSH config path (default: ~/.ssh/config)
|
||
-sh, --ssh-known-hosts PATH SSH known_hosts path (default: ~/.ssh/known_hosts)
|
||
-r1, --repo-ops PATH rai-ops repo path (default: ~/Razer/rai-ops)
|
||
-r2, --repo-deploy PATH rai-deployment repo path (default: ~/Razer/rai-deployment)
|
||
-r3, --repo-devtools PATH rai-devtools repo path (default: ~/Razer/rai-devtools)
|
||
-t, --timezone ZONE Timezone (default: Asia/Singapore)
|
||
-d, --docker-image TAG Docker image tag (default: ngn-agent:latest)
|
||
-y, --yes Non-interactive mode (skip prompts, use env vars)
|
||
-h, --help Show this help message
|
||
|
||
Secrets are prompted interactively with masked input unless -y is set,
|
||
in which case they are read from environment variables.
|
||
USAGE
|
||
}
|
||
|
||
# ---- Argument defaults ----
|
||
SSH_KEY_1="${SSH_KEY_1:-$HOME/.ssh/id_ed25519razer}"
|
||
SSH_KEY_2="${SSH_KEY_2:-$HOME/.ssh/id_rsa}"
|
||
SSH_CONFIG="${SSH_CONFIG:-$HOME/.ssh/config}"
|
||
SSH_KNOWN_HOSTS="${SSH_KNOWN_HOSTS:-$HOME/.ssh/known_hosts}"
|
||
REPO_OPS="${REPO_OPS:-$HOME/Razer/rai-ops}"
|
||
REPO_DEPLOY="${REPO_DEPLOY:-$HOME/Razer/rai-deployment}"
|
||
REPO_DEVTOOLS="${REPO_DEVTOOLS:-$HOME/Razer/rai-devtools}"
|
||
TIMEZONE="${TIMEZONE:-Asia/Singapore}"
|
||
DOCKER_IMAGE="${DOCKER_IMAGE:-ngn-agent:latest}"
|
||
NONINTERACTIVE=false
|
||
|
||
# ---- Argument parsing (per D-09) ----
|
||
while [[ $# -gt 0 ]]; do
|
||
case "$1" in
|
||
-s1|--ssh-key-1)
|
||
SSH_KEY_1="$2"; shift 2 ;;
|
||
-s2|--ssh-key-2)
|
||
SSH_KEY_2="$2"; shift 2 ;;
|
||
-sc|--ssh-config)
|
||
SSH_CONFIG="$2"; shift 2 ;;
|
||
-sh|--ssh-known-hosts)
|
||
SSH_KNOWN_HOSTS="$2"; shift 2 ;;
|
||
-r1|--repo-ops)
|
||
REPO_OPS="$2"; shift 2 ;;
|
||
-r2|--repo-deploy)
|
||
REPO_DEPLOY="$2"; shift 2 ;;
|
||
-r3|--repo-devtools)
|
||
REPO_DEVTOOLS="$2"; shift 2 ;;
|
||
-t|--timezone)
|
||
TIMEZONE="$2"; shift 2 ;;
|
||
-d|--docker-image)
|
||
DOCKER_IMAGE="$2"; shift 2 ;;
|
||
-y|--yes)
|
||
NONINTERACTIVE=true; shift ;;
|
||
-h|--help)
|
||
usage; exit 0 ;;
|
||
*)
|
||
echo "Unknown option: $1"
|
||
usage; exit 1 ;;
|
||
esac
|
||
done
|
||
|
||
# ---- Interactive secret prompt (per D-08) ----
|
||
# T-09-05 mitigation: read -s for masked input, no echo to terminal
|
||
prompt_secret() {
|
||
local var_name="$1"
|
||
local prompt_text="$2"
|
||
local is_optional="${3:-false}"
|
||
local val=""
|
||
|
||
# If env var is already set (e.g., user exported it), skip prompt
|
||
if [ -n "${!var_name:-}" ]; then
|
||
echo " → ${var_name} already set (using environment value)"
|
||
echo "${!var_name}"
|
||
return
|
||
fi
|
||
|
||
while [ -z "$val" ]; do
|
||
read -s -p "${prompt_text}" val
|
||
echo
|
||
if [ -z "$val" ] && [ "$is_optional" = "true" ]; then
|
||
# Optional and empty — return empty string
|
||
echo ""
|
||
return
|
||
elif [ -z "$val" ]; then
|
||
echo " ⚠ Value cannot be empty. Press Ctrl+C to cancel."
|
||
fi
|
||
done
|
||
echo "$val"
|
||
}
|
||
|
||
# ---- Prerequisite checks ----
|
||
check_prerequisites() {
|
||
echo " → Checking prerequisites..."
|
||
|
||
# 1. Hermes CLI installed (per D-07)
|
||
if ! command -v hermes >/dev/null 2>&1; then
|
||
echo " ERROR: Hermes CLI not found — install v0.16+ first."
|
||
echo " See: https://github.com/nousresearch/hermes"
|
||
exit 1
|
||
fi
|
||
echo " ✓ Hermes CLI found: $(hermes --version 2>/dev/null || echo 'unknown version')"
|
||
|
||
# 2. Docker running
|
||
if ! docker info >/dev/null 2>&1; then
|
||
echo " ERROR: Docker is not running."
|
||
echo " Start Docker Desktop or Orbstack first."
|
||
exit 1
|
||
fi
|
||
echo " ✓ Docker is running"
|
||
|
||
# 3. SSH key files exist
|
||
if [ ! -f "$SSH_KEY_1" ]; then
|
||
echo " ⚠ SSH key not found: ${SSH_KEY_1}"
|
||
else
|
||
echo " ✓ SSH key 1: ${SSH_KEY_1}"
|
||
fi
|
||
if [ ! -f "$SSH_KEY_2" ]; then
|
||
echo " ⚠ SSH key not found: ${SSH_KEY_2}"
|
||
else
|
||
echo " ✓ SSH key 2: ${SSH_KEY_2}"
|
||
fi
|
||
if [ ! -f "$SSH_CONFIG" ]; then
|
||
echo " ⚠ SSH config not found: ${SSH_CONFIG}"
|
||
else
|
||
echo " ✓ SSH config: ${SSH_CONFIG}"
|
||
fi
|
||
if [ ! -f "$SSH_KNOWN_HOSTS" ]; then
|
||
echo " ⚠ SSH known_hosts not found: ${SSH_KNOWN_HOSTS}"
|
||
else
|
||
echo " ✓ SSH known_hosts: ${SSH_KNOWN_HOSTS}"
|
||
fi
|
||
|
||
# 4. Repo paths exist
|
||
if [ ! -d "$REPO_OPS" ]; then
|
||
echo " ⚠ Repo not found: ${REPO_OPS}"
|
||
else
|
||
echo " ✓ Repo (ops): ${REPO_OPS}"
|
||
fi
|
||
if [ ! -d "$REPO_DEPLOY" ]; then
|
||
echo " ⚠ Repo not found: ${REPO_DEPLOY}"
|
||
else
|
||
echo " ✓ Repo (deploy): ${REPO_DEPLOY}"
|
||
fi
|
||
if [ ! -d "$REPO_DEVTOOLS" ]; then
|
||
echo " ⚠ Repo not found: ${REPO_DEVTOOLS}"
|
||
else
|
||
echo " ✓ Repo (devtools): ${REPO_DEVTOOLS}"
|
||
fi
|
||
}
|
||
|
||
# ---- Print path summary ----
|
||
print_summary() {
|
||
echo ""
|
||
echo " Configuration paths:"
|
||
echo " SSH key 1: ${SSH_KEY_1}"
|
||
echo " SSH key 2: ${SSH_KEY_2}"
|
||
echo " SSH config: ${SSH_CONFIG}"
|
||
echo " SSH known_hosts: ${SSH_KNOWN_HOSTS}"
|
||
echo " Repo (ops): ${REPO_OPS}"
|
||
echo " Repo (deploy): ${REPO_DEPLOY}"
|
||
echo " Repo (devtools): ${REPO_DEVTOOLS}"
|
||
echo " Timezone: ${TIMEZONE}"
|
||
echo " Docker image: ${DOCKER_IMAGE}"
|
||
echo ""
|
||
}
|
||
|
||
# ---- Create config directories ----
|
||
create_directories() {
|
||
echo " → Creating config directories..."
|
||
mkdir -p "$HOME/.hermes/scripts"
|
||
mkdir -p "$HOME/.hermes/hindsight"
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent"
|
||
mkdir -p "$HOME/.hermes/archive/sessions"
|
||
echo " ✓ Directories created"
|
||
}
|
||
|
||
# ---- Backup existing config (per Anti-Pattern 4, T-09-07 mitigation) ----
|
||
backup_config() {
|
||
if [ -f "$HOME/.hermes/config.yaml" ]; then
|
||
local bak_file="$HOME/.hermes/config.yaml.bak.$(date +%Y%m%d_%H%M%S)"
|
||
cp "$HOME/.hermes/config.yaml" "$bak_file"
|
||
echo " ✓ Backed up config.yaml → $(basename ${bak_file})"
|
||
else
|
||
echo " → No existing config.yaml to backup"
|
||
fi
|
||
}
|
||
|
||
# =============================================================================
|
||
# Task 2: Config Generation (D-10)
|
||
# =============================================================================
|
||
|
||
# ---- Generate config.yaml ----
|
||
# Uses hermes config set for simple keys, Python yaml or sed for arrays
|
||
generate_config_yaml() {
|
||
echo " → Generating config.yaml (D-10)..."
|
||
|
||
# Scalars via hermes config set
|
||
hermes config set terminal.backend docker
|
||
hermes config set terminal.docker_image "${DOCKER_IMAGE}"
|
||
hermes config set terminal.cwd /workspace
|
||
hermes config set terminal.container_memory 5120
|
||
hermes config set terminal.container_disk 51200
|
||
hermes config set terminal.container_cpu 1
|
||
hermes config set terminal.lifetime_seconds 300
|
||
hermes config set memory.provider hindsight
|
||
hermes config set terminal.timezone "${TIMEZONE}"
|
||
hermes config set telegram.reactions false
|
||
hermes config set terminal.docker_env.AWS_REGION us-east-1
|
||
hermes config set terminal.container_persistent true
|
||
hermes config set terminal.docker_mount_cwd_to_workspace true
|
||
echo " ✓ Scalar config keys set"
|
||
|
||
# Complex structures (arrays): try Python yaml first, fall back to sed
|
||
if python3 -c "import yaml; import os" 2>/dev/null; then
|
||
echo " → Using Python yaml for array structures..."
|
||
python3 -c "
|
||
import yaml, os
|
||
|
||
path = os.path.expanduser('~/.hermes/config.yaml')
|
||
with open(path) as f:
|
||
config = yaml.safe_load(f)
|
||
|
||
ssh_key_1 = '${SSH_KEY_1}'
|
||
ssh_key_2 = '${SSH_KEY_2}'
|
||
ssh_config = '${SSH_CONFIG}'
|
||
ssh_known_hosts = '${SSH_KNOWN_HOSTS}'
|
||
repo_ops = '${REPO_OPS}'
|
||
repo_deploy = '${REPO_DEPLOY}'
|
||
repo_devtools = '${REPO_DEVTOOLS}'
|
||
home = os.path.expanduser('~')
|
||
|
||
config['terminal']['docker_volumes'] = [
|
||
ssh_key_1 + ':/root/.ssh/id_ed25519razer:ro',
|
||
ssh_key_2 + ':/root/.ssh/id_rsa:ro',
|
||
ssh_config + ':/root/.ssh/config:ro',
|
||
ssh_known_hosts + ':/root/.ssh/known_hosts:ro',
|
||
home + '/.aws/config:/root/.aws/config:ro',
|
||
home + '/.aws/sso/cache:/root/.aws/sso/cache:rw',
|
||
repo_ops + ':/workspace/rai-ops:rw',
|
||
repo_deploy + ':/workspace/rai-deployment:rw',
|
||
repo_devtools + ':/workspace/rai-devtools:rw',
|
||
home + '/.hermes/scripts:/usr/local/bin:ro',
|
||
]
|
||
|
||
config['terminal']['docker_forward_env'] = ['JIRA_EMAIL', 'JIRA_API_TOKEN', 'DEFAULT_REPOS']
|
||
config['terminal']['shell_init_files'] = ['/usr/local/bin/session-init.sh']
|
||
|
||
with open(path, 'w') as f:
|
||
yaml.dump(config, f, default_flow_style=False)
|
||
"
|
||
echo " ✓ Array structures set via Python yaml"
|
||
else
|
||
echo " → Python yaml not available, using sed fallback..."
|
||
# Fallback: use sed to inject arrays into config.yaml
|
||
local config_file="$HOME/.hermes/config.yaml"
|
||
|
||
# Add docker_volumes block
|
||
cat >> "$config_file" << 'SEDVOL'
|
||
|
||
terminal:
|
||
docker_volumes:
|
||
- ${SSH_KEY_1}:/root/.ssh/id_ed25519razer:ro
|
||
- ${SSH_KEY_2}:/root/.ssh/id_rsa:ro
|
||
- ${SSH_CONFIG}:/root/.ssh/config:ro
|
||
- ${SSH_KNOWN_HOSTS}:/root/.ssh/known_hosts:ro
|
||
- ${HOME}/.aws/config:/root/.aws/config:ro
|
||
- ${HOME}/.aws/sso/cache:/root/.aws/sso/cache:rw
|
||
- ${REPO_OPS}:/workspace/rai-ops:rw
|
||
- ${REPO_DEPLOY}:/workspace/rai-deployment:rw
|
||
- ${REPO_DEVTOOLS}:/workspace/rai-devtools:rw
|
||
- ${HOME}/.hermes/scripts:/usr/local/bin:ro
|
||
docker_forward_env:
|
||
- JIRA_EMAIL
|
||
- JIRA_API_TOKEN
|
||
- DEFAULT_REPOS
|
||
shell_init_files:
|
||
- /usr/local/bin/session-init.sh
|
||
SEDVOL
|
||
echo " ✓ Array structures set via sed (partial — review config.yaml)"
|
||
fi
|
||
|
||
# Validate
|
||
local img
|
||
img=$(hermes config get terminal.docker_image 2>/dev/null || echo "unset")
|
||
echo " ✓ Verified: terminal.docker_image = ${img}"
|
||
}
|
||
|
||
# ---- Generate .env file ----
|
||
# T-09-06 mitigation: chmod 600 on .env immediately after writing
|
||
generate_env_file() {
|
||
echo " → Generating .env (D-08, D-10)..."
|
||
|
||
local env_file="$HOME/.hermes/.env"
|
||
|
||
# Resolve HINDSIGHT_LLM_API_KEY — defaults to OPENROUTER_API_KEY if not separately provided
|
||
local hind_key="${HINDSIGHT_LLM_API_KEY:-${OPENROUTER_API_KEY}}"
|
||
|
||
cat > "$env_file" << ENVEOF
|
||
# ngn-agent Environment — generated by setup-ngn-agent.sh
|
||
# Embedded file snapshots frozen at: 2026-06-15
|
||
|
||
# =============================================================================
|
||
# LLM PROVIDER (OpenRouter)
|
||
# =============================================================================
|
||
OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||
|
||
# =============================================================================
|
||
# ATLASSIAN INTEGRATION
|
||
# =============================================================================
|
||
JIRA_API_TOKEN=${JIRA_API_TOKEN}
|
||
JIRA_EMAIL=${JIRA_EMAIL}
|
||
|
||
# =============================================================================
|
||
# TELEGRAM GATEWAY
|
||
# =============================================================================
|
||
TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
|
||
TELEGRAM_ALLOWED_USERS=474440517
|
||
|
||
# =============================================================================
|
||
# HINDSIGHT MEMORY
|
||
# =============================================================================
|
||
HINDSIGHT_LLM_API_KEY=${hind_key}
|
||
|
||
# =============================================================================
|
||
# NGN-AGENT CONFIG
|
||
# =============================================================================
|
||
DEFAULT_REPOS=rai-ops,rai-deployment,rai-devtools
|
||
TERMINAL_TIMEOUT=60
|
||
TERMINAL_LIFETIME_SECONDS=300
|
||
ENVEOF
|
||
|
||
# T-09-06: Restrict permissions immediately after writing
|
||
chmod 600 "$env_file"
|
||
echo " ✓ .env written with chmod 600 (T-09-06)"
|
||
}
|
||
|
||
# ---- Generate hindsight config.json (D-10) ----
|
||
generate_hindsight_config() {
|
||
echo " → Generating hindsight/config.json (D-10)..."
|
||
|
||
cat > "$HOME/.hermes/hindsight/config.json" << 'JSONEOF'
|
||
{
|
||
"mode": "local_embedded",
|
||
"llm_provider": "openrouter",
|
||
"llm_base_url": "https://openrouter.ai/api/v1",
|
||
"llm_model": "qwen/qwen3.5-9b",
|
||
"bank_id": "hermes",
|
||
"recall_budget": "low",
|
||
"recall_prefetch_method": "recall",
|
||
"auto_recall": true,
|
||
"recall_types": "observation",
|
||
"auto_retain": true,
|
||
"retain_async": true,
|
||
"retain_every_n_turns": 5,
|
||
"memory_mode": "hybrid"
|
||
}
|
||
JSONEOF
|
||
|
||
echo " ✓ hindsight/config.json written"
|
||
}
|
||
|
||
# ---- Generate cron env config (D-10) ----
|
||
generate_cron_env_config() {
|
||
echo " → Configuring cron environment (D-10)..."
|
||
|
||
hermes config set cron.env.JIRA_EMAIL "${JIRA_EMAIL}" 2>/dev/null || \
|
||
echo " ⚠ Could not set cron.env.JIRA_EMAIL"
|
||
hermes config set cron.env.JIRA_API_TOKEN "${JIRA_API_TOKEN}" 2>/dev/null || \
|
||
echo " ⚠ Could not set cron.env.JIRA_API_TOKEN"
|
||
|
||
echo " ✓ Cron env vars configured"
|
||
}
|
||
|
||
# =============================================================================
|
||
# Task 3: File/Cron Setup (D-10)
|
||
# =============================================================================
|
||
|
||
# ---- Write session-init.sh (D-10) ----
|
||
write_session_init_script() {
|
||
echo " → Writing session-init.sh (D-10)..."
|
||
cat > "$HOME/.hermes/scripts/session-init.sh" << 'SCRIPT'
|
||
#!/bin/bash
|
||
# session-init.sh — Verify DEFAULT_REPOS mounts at session start
|
||
# Runs via shell_init_files before agent prompt. Non-blocking.
|
||
# Reads DEFAULT_REPOS from environment (forwarded via docker_forward_env).
|
||
set -uo pipefail
|
||
|
||
DEFAULT_REPOS="${DEFAULT_REPOS:-}"
|
||
|
||
if [ -z "$DEFAULT_REPOS" ]; then
|
||
echo "[session-init] DEFAULT_REPOS not set — skipping verification"
|
||
exit 0
|
||
fi
|
||
|
||
# Split comma-separated list
|
||
IFS=',' read -ra REPOS <<< "$DEFAULT_REPOS"
|
||
ALL_OK=true
|
||
|
||
for repo in "${REPOS[@]}"; do
|
||
# Trim whitespace
|
||
repo="${repo#"${repo%%[![:space:]]*}"}"
|
||
repo="${repo%"${repo##*[![:space:]]}"}"
|
||
|
||
if [ -d "/workspace/$repo/.git" ]; then
|
||
echo "[session-init] ✓ $repo — mounted at /workspace/$repo"
|
||
else
|
||
echo "[session-init] ⚠ $repo — NOT FOUND at /workspace/$repo"
|
||
ALL_OK=false
|
||
fi
|
||
done
|
||
|
||
if [ "$ALL_OK" = true ]; then
|
||
echo "[session-init] All DEFAULT_REPOS verified"
|
||
else
|
||
echo "[session-init] Some repos missing — check docker_volumes in config.yaml"
|
||
fi
|
||
|
||
exit 0 # always exit cleanly — non-blocking
|
||
SCRIPT
|
||
chmod +x "$HOME/.hermes/scripts/session-init.sh"
|
||
echo " ✓ session-init.sh written and executable"
|
||
}
|
||
|
||
# ---- Write archive-stale-sessions.sh (D-10) ----
|
||
write_archive_script() {
|
||
echo " → Writing archive-stale-sessions.sh (D-10)..."
|
||
cat > "$HOME/.hermes/scripts/archive-stale-sessions.sh" << 'SCRIPT'
|
||
#!/bin/bash
|
||
# Archive stale sessions (inactive >30 days) and prune from live DB
|
||
# This script runs via hermes cron with --no-agent
|
||
# Stdout is delivered to Telegram via --deliver telegram
|
||
# Dry-run mode: export only, no prune — safe default for first run
|
||
set -euo pipefail
|
||
|
||
DRY_RUN=true
|
||
|
||
ARCHIVE_DIR="$HOME/.hermes/archive/sessions"
|
||
mkdir -p "$ARCHIVE_DIR"
|
||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||
OUTPUT_FILE="$ARCHIVE_DIR/sessions-${TIMESTAMP}.jsonl"
|
||
|
||
echo "=== Stale Session Archive ==="
|
||
echo "Started: $(date)"
|
||
echo "Dry run: $DRY_RUN"
|
||
echo ""
|
||
|
||
echo "[1/3] Exporting session store..."
|
||
echo " Output: $OUTPUT_FILE"
|
||
hermes sessions export "$OUTPUT_FILE"
|
||
echo " -> $(wc -l < "$OUTPUT_FILE") sessions exported"
|
||
echo " -> Size: $(du -h "$OUTPUT_FILE" | cut -f1)"
|
||
echo ""
|
||
|
||
if [ "$DRY_RUN" = false ]; then
|
||
echo "[2/3] Pruning sessions older than 30 days..."
|
||
hermes sessions prune --older-than 30 --yes
|
||
echo " Done."
|
||
else
|
||
echo "[2/3] SKIPPED (dry run) — set DRY_RUN=false to enable prune"
|
||
echo " Review $OUTPUT_FILE before enabling."
|
||
fi
|
||
echo ""
|
||
|
||
echo "[3/3] Post-archive stats:"
|
||
hermes sessions stats
|
||
echo ""
|
||
|
||
echo "✓ Archive complete."
|
||
SCRIPT
|
||
chmod +x "$HOME/.hermes/scripts/archive-stale-sessions.sh"
|
||
echo " ✓ archive-stale-sessions.sh written and executable"
|
||
}
|
||
|
||
# ---- Write skill files (D-10) ----
|
||
write_jira_skill() {
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent/jira"
|
||
cat > "$HOME/.hermes/skills/ngn-agent/jira/SKILL.md" << 'SKILL'
|
||
---
|
||
name: jira-query
|
||
description: Query Jira Cloud issues, search, and manage tickets
|
||
metadata:
|
||
hermes:
|
||
tags: [jira, project-management]
|
||
category: devops
|
||
requires_toolsets: [terminal]
|
||
version: 1.0.0
|
||
---
|
||
# Jira Cloud Query
|
||
|
||
## When to Use
|
||
When the user asks to search Jira issues, check ticket status, or list project work.
|
||
|
||
## Procedure
|
||
|
||
### 1. Search issues by JQL
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/search?jql=ORDER BY created DESC&maxResults=10'
|
||
```
|
||
|
||
For specific project:
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/search?jql=project=PROJ ORDER BY created DESC&maxResults=10'
|
||
```
|
||
|
||
### 2. Get issue details
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/issue/PROJ-123'
|
||
```
|
||
|
||
### 3. List sprints (if Jira Software)
|
||
```bash
|
||
ngn-jira GET '/rest/agile/1.0/board'
|
||
ngn-jira GET '/rest/agile/1.0/board/{boardId}/sprint?state=active'
|
||
```
|
||
|
||
### 4. Get issue comments
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/issue/PROJ-123/comment'
|
||
```
|
||
|
||
## Pitfalls
|
||
- JQL is case-sensitive for field names
|
||
- maxResults defaults to 50; set explicitly for large queries
|
||
- Agile REST API may not be available on all plans
|
||
|
||
## Required Environment
|
||
- `JIRA_EMAIL` — your Atlassian account email
|
||
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
|
||
SKILL
|
||
echo " ✓ jira/SKILL.md written"
|
||
}
|
||
|
||
write_aws_skill() {
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references"
|
||
cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/SKILL.md" << 'SKILL'
|
||
---
|
||
name: aws-diagnostics
|
||
description: Read-only AWS diagnostics for platform engineering
|
||
metadata:
|
||
hermes:
|
||
tags: [aws, diagnostics, platform-engineering]
|
||
category: devops
|
||
requires_toolsets: [terminal]
|
||
version: 1.0.0
|
||
---
|
||
# AWS Diagnostics
|
||
|
||
## When to Use
|
||
When the user asks to check AWS resources, investigate issues, or audit infrastructure in any account.
|
||
|
||
## Important
|
||
- ALWAYS determine the correct AWS_PROFILE before running commands
|
||
- NEVER run mutating AWS commands (delete, terminate, stop, modify)
|
||
- Prefer read-only AWS CLI commands (describe, list, get)
|
||
|
||
## Procedure
|
||
|
||
### 1. Identify the target account
|
||
Ask the user which account/environment they want to target. Available profiles in `/.aws/config`:
|
||
- `rzaws-sw-rai-ava-dev/prod/rc` — AVA service
|
||
- `rzaws-sw-rai-cs-dev/prod/rc` — CS service
|
||
- `rzaws-sw-rai-qac-dev/prod` — QAC
|
||
- `rzaws-sw-rai-ops` — Ops account
|
||
- `rzaws-sw-rai-voicekit-dev/prod/rc` — VoiceKit
|
||
- `rzaws-sw-rai-preprod` — Pre-production
|
||
- `rzaws-sw-rai-nonprod` — Non-production
|
||
|
||
### 2. Set the profile
|
||
```bash
|
||
export AWS_PROFILE=rzaws-sw-rai-<service>-<env>
|
||
```
|
||
|
||
### 3. Diagnostic commands
|
||
|
||
**EC2 instances:**
|
||
```bash
|
||
aws ec2 describe-instances --query 'Reservations[*].Instances[*].[InstanceId,State.Name,InstanceType,Tags[?Key==`Name`].Value|[0]]' --output table
|
||
```
|
||
|
||
**ECS services:**
|
||
```bash
|
||
aws ecs list-clusters && aws ecs list-services --cluster <name>
|
||
```
|
||
|
||
**S3 buckets:**
|
||
```bash
|
||
aws s3 ls
|
||
```
|
||
|
||
**CloudWatch alarms:**
|
||
```bash
|
||
aws cloudwatch describe-alarms --state-value ALARM --output table
|
||
```
|
||
|
||
**ECS task health:**
|
||
```bash
|
||
aws ecs describe-tasks --cluster <name> --tasks <task-ids>
|
||
```
|
||
|
||
**RDS instances:**
|
||
```bash
|
||
aws rds describe-db-instances --query 'DBInstances[*].[DBInstanceIdentifier,DBInstanceStatus,Engine,DBInstanceClass]' --output table
|
||
```
|
||
|
||
**Lambda functions:**
|
||
```bash
|
||
aws lambda list-functions --query 'Functions[*].[FunctionName,Runtime,LastModified]' --output table
|
||
```
|
||
|
||
**ELB target group health:**
|
||
```bash
|
||
aws elbv2 describe-target-groups --query 'TargetGroups[*].[TargetGroupName,TargetType]' --output table
|
||
```
|
||
|
||
### 4. Report findings
|
||
Format as a concise table. Include account ID and profile used.
|
||
|
||
## Alternative: Infrastructure Code Analysis
|
||
|
||
When AWS CLI access is unavailable (Docker containers, credential issues), examine existing infrastructure code instead:
|
||
|
||
```bash
|
||
# Search for region patterns
|
||
search_files --pattern="us-west-2" --path="/workspace"
|
||
|
||
# Check terraform configurations
|
||
read_file /workspace/rai-ops/aws/<account>/us-east-1/app/main.tf
|
||
read_file /workspace/rai-ops/aws/<account>/us-east-1/app/<app>.tfvars
|
||
|
||
# Look for provider configurations
|
||
search_files --pattern="provider.*replica" --path="/workspace/rai-ops"
|
||
|
||
# Check S3 migration data
|
||
search_files --pattern="s3-mapping" --target="files"
|
||
```
|
||
|
||
**When to use code analysis:**
|
||
- Docker container with read-only filesystem
|
||
- Missing AWS CLI or credentials
|
||
- Need to understand intended architecture vs live state
|
||
- Investigating multiregional setup patterns
|
||
|
||
## Pitfalls
|
||
- SSO tokens expire (~6-8h). If you get auth errors, ask the user to run `aws sso login`
|
||
- Some accounts may not have all services — check `aws sts get-caller-identity` first
|
||
- Don't pipe large results directly — use `--query` and `--output table` for readability
|
||
- **Don't persist with CLI installation in constrained environments** — switch to code analysis quickly when installation fails
|
||
|
||
## Verification
|
||
Run `aws sts get-caller-identity` to confirm the correct profile is active before running diagnostics.
|
||
|
||
## References
|
||
- `references/multiregional-patterns.md` - Terraform patterns for cross-region infrastructure setup
|
||
SKILL
|
||
|
||
# AWS reference file
|
||
cat > "$HOME/.hermes/skills/ngn-agent/aws-diagnostics/references/multiregional-patterns.md" << 'REF'
|
||
# Multiregional Infrastructure Patterns
|
||
|
||
## AVA Multiregional Setup
|
||
|
||
### Provider Configuration Pattern
|
||
```hcl
|
||
# Primary provider (us-east-1)
|
||
provider "aws" {
|
||
region = var.region
|
||
# ... assume_role block
|
||
}
|
||
|
||
# Replica provider (us-west-2)
|
||
provider "aws" {
|
||
alias = "replica"
|
||
region = "us-west-2"
|
||
# ... same assume_role block
|
||
}
|
||
```
|
||
|
||
### Module Consumption
|
||
```hcl
|
||
module "app" {
|
||
providers = {
|
||
aws = aws
|
||
aws.replica = aws.replica # Required by tf-modules/app/versions.tf
|
||
}
|
||
# ... other config
|
||
}
|
||
```
|
||
|
||
### Database Replication Options
|
||
|
||
**RDS Aurora PostgreSQL (Current Pattern)**
|
||
- Engine: `aurora-postgresql`
|
||
- Version: `16.11`
|
||
- Cross-region read replicas supported
|
||
- Can promote replica for DR scenarios
|
||
|
||
**DynamoDB Global Tables (Available)**
|
||
- Global Table v2 with us-east-1 + us-west-2 replicas
|
||
- Per-region CMKs for encryption
|
||
- Feature-flagged via `var.tenant_registry`
|
||
- Documented in RAID-352
|
||
|
||
### S3 Cross-Region Replication
|
||
Extensive existing pattern from migration data:
|
||
- `ava-{env}-west-*` buckets in us-west-2
|
||
- Matching `rai-s3-usw2-*` naming convention
|
||
- Covers: bug reports, screenshots, game logs, shiny moments
|
||
|
||
### Key Files for Multiregional Analysis
|
||
- `aws/<account>/us-east-1/app/provider.tf` - Replica provider config
|
||
- `aws/<account>/us-east-1/app/<app>.tfvars` - App-specific resources
|
||
- `raid-migration/raid-s3-migration/s3-mapping.csv` - Cross-region S3 inventory
|
||
- `RAID-352-PR-DESCRIPTION.md` - DynamoDB Global Tables documentation
|
||
REF
|
||
echo " ✓ aws-diagnostics/SKILL.md + references written"
|
||
}
|
||
|
||
write_confluence_skill() {
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent/confluence"
|
||
cat > "$HOME/.hermes/skills/ngn-agent/confluence/SKILL.md" << 'SKILL'
|
||
---
|
||
name: confluence-search
|
||
description: Search and retrieve Confluence pages
|
||
metadata:
|
||
hermes:
|
||
tags: [confluence, documentation]
|
||
category: devops
|
||
requires_toolsets: [terminal]
|
||
version: 1.0.0
|
||
---
|
||
# Confluence Search
|
||
|
||
## When to Use
|
||
When the user asks to find documentation, search Confluence pages, or retrieve page content.
|
||
|
||
## Procedure
|
||
|
||
### 1. Search pages by text
|
||
```bash
|
||
ngn-confluence GET '/rest/api/search?cql=text~"search terms"&limit=10'
|
||
```
|
||
|
||
### 2. Search by space
|
||
```bash
|
||
ngn-confluence GET '/rest/api/search?cql=space=ADM&limit=10'
|
||
```
|
||
|
||
### 3. Get page content
|
||
```bash
|
||
ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage'
|
||
```
|
||
|
||
### 4. List pages in space
|
||
```bash
|
||
ngn-confluence GET '/rest/api/content?spaceKey=ADM&limit=50'
|
||
```
|
||
|
||
### 5. Get page children
|
||
```bash
|
||
ngn-confluence GET '/rest/api/content/{pageId}/child/page?limit=50'
|
||
```
|
||
|
||
## Pitfalls
|
||
- CQL is different from JQL — `text~"query"` for full-text search
|
||
- Page body needs `expand=body.storage` to retrieve content
|
||
- Use `limit` parameter — defaults to 25
|
||
|
||
## Required Environment
|
||
- `JIRA_EMAIL` — your Atlassian account email
|
||
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
|
||
SKILL
|
||
echo " ✓ confluence/SKILL.md written"
|
||
}
|
||
|
||
write_bitbucket_skill() {
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent/bitbucket"
|
||
cat > "$HOME/.hermes/skills/ngn-agent/bitbucket/SKILL.md" << 'SKILL'
|
||
---
|
||
name: bitbucket-pr
|
||
description: Review Bitbucket pull requests and repositories
|
||
metadata:
|
||
hermes:
|
||
tags: [bitbucket, git, code-review]
|
||
category: devops
|
||
requires_toolsets: [terminal]
|
||
version: 1.0.0
|
||
---
|
||
# Bitbucket Pull Requests
|
||
|
||
## When to Use
|
||
When the user asks to check PRs, review code, or list repositories.
|
||
|
||
## Procedure
|
||
|
||
### 1. List repositories
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw?pagelen=20'
|
||
```
|
||
|
||
### 2. List open PRs for a repo
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests?state=OPEN&pagelen=20'
|
||
```
|
||
|
||
### 3. Get PR details
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}'
|
||
```
|
||
|
||
### 4. Get PR diff
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/diff'
|
||
```
|
||
|
||
### 5. Get PR comments
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw/{repo}/pullrequests/{prId}/comments'
|
||
```
|
||
|
||
### 6. List branch list
|
||
```bash
|
||
ngn-bitbucket GET '/repositories/razersw/{repo}/refs/branches?pagelen=20'
|
||
```
|
||
|
||
## Pitfalls
|
||
- Bitbucket pagination uses `pagelen` and `page` params (not `maxResults`)
|
||
- Diff endpoint returns raw diff text — may be large
|
||
- PR comments include inline code comments, not just summary
|
||
|
||
## Required Environment
|
||
- `JIRA_EMAIL` — your Atlassian account email
|
||
- `JIRA_API_TOKEN` — from https://id.atlassian.com/manage/api-tokens
|
||
SKILL
|
||
echo " ✓ bitbucket/SKILL.md written"
|
||
}
|
||
|
||
write_session_skill() {
|
||
mkdir -p "$HOME/.hermes/skills/ngn-agent/session/references"
|
||
cat > "$HOME/.hermes/skills/ngn-agent/session/SKILL.md" << 'SKILL'
|
||
---
|
||
name: session
|
||
description: Main ngn-agent session lifecycle — init, work, close
|
||
metadata:
|
||
hermes:
|
||
tags: [ngn-agent, platform-engineering, session]
|
||
category: devops
|
||
requires_toolsets: [terminal]
|
||
version: 1.0.0
|
||
---
|
||
|
||
# ngn-agent Session Lifecycle
|
||
|
||
## When to Use
|
||
|
||
Load this skill at the START of EVERY platform engineering session, before any other work. This skill defines the standard session workflow.
|
||
|
||
Specific triggers:
|
||
- When the user starts any infrastructure or platform engineering task
|
||
- When the user asks to create a Jira ticket or find a ticket
|
||
- When the user wants to search or load Confluence documentation
|
||
- When a session is ending and you need to document progress
|
||
- When you need to save context for future sessions
|
||
|
||
## Important
|
||
|
||
- **Keep this skill loaded for the entire session** — if context grows large, reload via `skill_view("session")` before the session-end steps (Steps 5–7)
|
||
- **Never create Jira tickets without asking the user first** (D-02)
|
||
- **Never update Confluence without asking the user first** (D-11)
|
||
- **Always save session summary to hindsight at end** — this step has no user prompt, it is automatic (D-12)
|
||
- User must confirm before any Jira mutation (create, comment, transition) — D-08
|
||
- Repos are already mounted at `/workspace/` from Phase 6 (rai-ops, rai-deployment, rai-devtools)
|
||
- This skill replaces the ad-hoc session workflow with a repeatable init→work→close pattern
|
||
|
||
## Procedure
|
||
|
||
### 1. Check for Similar Previous Sessions
|
||
|
||
At the very start of a session, use `hindsight_recall` with a query describing the user's current task to find similar sessions from the last 2 weeks.
|
||
|
||
Call `hindsight_recall` with a budget of low:
|
||
|
||
```
|
||
Tool: hindsight_recall
|
||
Query: "<user's task description>"
|
||
Budget: low
|
||
```
|
||
|
||
Present any matches to the user in this format:
|
||
|
||
```
|
||
Found [N] similar sessions from the last 2 weeks:
|
||
1. [Session Title] — [Date] — [one-line summary]
|
||
2. [Session Title] — [Date] — [one-line summary]
|
||
```
|
||
|
||
Ask the user: "Would you like to resume any of these sessions, or start fresh?"
|
||
- If they choose to resume: load that session's context and continue
|
||
- If they choose fresh: proceed to step 2
|
||
|
||
If no similar sessions are found (normal for first sessions), proceed to step 2.
|
||
|
||
### 2. Prompt: Create Jira Ticket
|
||
|
||
Ask the user: "Would you like to create a Jira Task ticket for this session?"
|
||
|
||
If YES:
|
||
1. Ask which Jira project to use (e.g., "PLATFORM", "DEVOPS") — do not hardcode (D-06)
|
||
2. Check hindsight for cached epics:
|
||
|
||
```
|
||
Tool: hindsight_recall
|
||
Query: "jira epics cached"
|
||
Budget: low
|
||
```
|
||
|
||
3. If epics are cached, check the cache timestamp:
|
||
- If the cache is more than 24 hours old OR the user says the list looks wrong, refresh from Jira:
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=<PROJECT>&fields=summary,id&maxResults=50'
|
||
```
|
||
Save fresh epics to hindsight:
|
||
```
|
||
Tool: hindsight_retain
|
||
tier: "epic-cache"
|
||
content: "Epic Cache [<date>]: PROJECT=<PROJECT>: [EPIC-KEY-1: Summary, EPIC-KEY-2: Summary, ...]"
|
||
```
|
||
- If the cache is fresh (less than 24 hours old), use the cached list
|
||
4. If no cached epics found, query Jira for current epics:
|
||
```bash
|
||
ngn-jira GET '/rest/api/3/search?jql=issuetype=Epic AND project=<PROJECT>&fields=summary,id&maxResults=50'
|
||
```
|
||
Save to hindsight for future sessions:
|
||
```
|
||
Tool: hindsight_retain
|
||
tier: "epic-cache"
|
||
content: "Epic Cache [<date>]: PROJECT=<PROJECT>: [EPIC-KEY-1: Summary, ...]"
|
||
```
|
||
5. Present cached/refreshed epics to the user: "Available epics: [list]. Would you like to set a parent epic?"
|
||
6. If user selects an epic, include it as parent when creating the ticket
|
||
7. Create the Task via Jira REST API:
|
||
|
||
```bash
|
||
ngn-jira POST '/rest/api/3/issue' --body '{
|
||
"fields": {
|
||
"project": {"key": "<PROJECT>"},
|
||
"summary": "<session task description>",
|
||
"issuetype": {"name": "Task"},
|
||
"parent": {"key": "<EPIC_KEY>"}
|
||
}
|
||
}'
|
||
```
|
||
|
||
8. Note the ticket key (e.g., `PLATFORM-123`) — save it for session-end steps (Step 5)
|
||
|
||
If NO: proceed to step 3 (no Jira ticket this session)
|
||
|
||
### 3. Prompt: Load Confluence Documentation
|
||
|
||
Ask the user: "Would you like to load relevant Confluence documentation?"
|
||
|
||
If YES:
|
||
1. Search by the `ngn-agent` tag:
|
||
|
||
```bash
|
||
ngn-confluence GET '/rest/api/search?cql=tag="ngn-agent"&limit=20'
|
||
```
|
||
|
||
2. Present matching pages to the user:
|
||
```
|
||
Found [N] pages tagged 'ngn-agent':
|
||
- [Title] — [Space] — [Last Modified]
|
||
```
|
||
|
||
3. Ask: "Which pages would you like me to load?"
|
||
4. For each selected page, load its full content:
|
||
|
||
```bash
|
||
ngn-confluence GET '/rest/api/content/{pageId}?expand=body.storage'
|
||
```
|
||
|
||
5. Review the loaded content with the user
|
||
|
||
If NO: proceed to step 4
|
||
|
||
### 4. Work Phase
|
||
|
||
Repos are already mounted at `/workspace/` (rai-ops, rai-deployment, rai-devtools). Proceed with the task using standard Hermes tools.
|
||
|
||
If you need to clone additional repos:
|
||
```bash
|
||
git clone git@bitbucket.org:razersw/<repo>.git /workspace/<repo>
|
||
```
|
||
|
||
The session skill remains loaded for the session-end steps below. If the skill is evicted from context during a long session, reload it with `skill_view("session")` before proceeding to Steps 5–7.
|
||
|
||
### 5. Session-End: Update Jira
|
||
|
||
When the user indicates work is complete or the session wraps up:
|
||
|
||
Ask the user: "Would you like me to update the Jira ticket with a summary comment?"
|
||
|
||
If YES (and a ticket was created in Step 2):
|
||
```bash
|
||
ngn-jira POST '/rest/api/3/issue/<TICKET-KEY>/comment' --body '{
|
||
"body": "<summary of work done, key decisions, next steps>"
|
||
}'
|
||
```
|
||
|
||
If NO: proceed without updating Jira.
|
||
|
||
**Important (D-08):** Do NOT transition tickets (e.g., close, resolve, move to Done) without explicit user confirmation. Only add comments unless the user specifically asks for a status change.
|
||
|
||
### 6. Session-End: Update Confluence
|
||
|
||
Ask the user: "Would you like me to create or update a Confluence page documenting this session?"
|
||
|
||
If YES:
|
||
- For a new page:
|
||
```bash
|
||
ngn-confluence POST '/rest/api/content' --body '{
|
||
"type": "page",
|
||
"title": "<Session Date>: <Task Description>",
|
||
"space": {"key": "<SPACE_KEY>"},
|
||
"body": {
|
||
"storage": {
|
||
"value": "<h1>Session Summary</h1><p><task summary, key decisions, outcomes></p>",
|
||
"representation": "storage"
|
||
}
|
||
},
|
||
"metadata": {
|
||
"properties": {
|
||
"content-appearance": {"value": "page"}
|
||
}
|
||
},
|
||
"labels": [{"name": "ngn-agent"}]
|
||
}'
|
||
```
|
||
|
||
- For updating an existing page: ask the user which page to update, then PUT to update its content
|
||
|
||
- **Important (D-11):** Do NOT create or update any Confluence page without the user confirming first
|
||
|
||
If NO: proceed without updating Confluence.
|
||
|
||
### 7. Session-End: Save to Hindsight (Automatic — No Prompt)
|
||
|
||
ALWAYS save a session summary to hindsight memory. Do NOT ask the user — this step is automatic and unconditional (D-12).
|
||
|
||
```bash
|
||
Tool: hindsight_retain
|
||
tier: "session-summary"
|
||
content: "
|
||
Session Summary
|
||
===============
|
||
Date: <today>
|
||
Task: <task description>
|
||
Repos: <repos worked on>
|
||
Jira: <ticket key or \"none\">
|
||
Key Decisions:
|
||
- <decision 1>
|
||
- <decision 2>
|
||
Outcomes:
|
||
- <outcome 1>
|
||
- <outcome 2>
|
||
Next Steps:
|
||
- <next step 1>
|
||
"
|
||
```
|
||
|
||
This summary allows future `hindsight_recall` queries to find this session for similarity matching (D-13). The structured content includes: date, task description, repos worked on, Jira ticket reference (or "none"), key decisions, outcomes, and next steps.
|
||
|
||
## Pitfalls
|
||
|
||
- **Skill not loaded at session start:** If you find yourself midway through a session without having run Steps 1–3, you missed the session start workflow. Run Step 1 (hindsight_recall) retroactively and ask the user if they want to create a Jira ticket or load Confluence docs. For future sessions, make sure to load this skill at the very start.
|
||
- **Epic cache too old:** Epics may change between sessions. Check the cache timestamp and refresh if more than 24 hours old. If the user says "that's wrong," always refresh regardless of age.
|
||
- **Confluence tag mismatch:** If the `ngn-agent` tag returns no results, try `platform-engineering` as a fallback, or ask the user what tag they use for session documentation.
|
||
- **Jira project doesn't exist:** If the create ticket call fails with a 404, the project key may be wrong. Ask the user to confirm the correct project key.
|
||
- **Empty hindsight recall (first sessions):** The first few sessions will have no similar sessions to find. That is normal — proceed with a fresh session. Over time, hindsight will accumulate session summaries.
|
||
- **Long sessions may evict this skill:** If the conversation grows long, the session skill content may be evicted from the agent's context. Reload it with `skill_view("session")` before the session-end steps (Steps 5–7) to ensure the Jira/Confluence prompts and hindsight save are not missed.
|
||
- **Missing Jira credentials in cron jobs:** The ngn-jira tool requires both `JIRA_EMAIL` and `JIRA_API_TOKEN` environment variables. If either is missing, Jira operations will fail with "unbound variable" errors. Check environment setup before attempting Jira updates in automated workflows.
|
||
|
||
## Operational Automation
|
||
|
||
### Daily Session Monitoring (Cron Job)
|
||
|
||
When running as a scheduled cron job for operational monitoring:
|
||
|
||
1. **Discover Active Sessions**:
|
||
```bash
|
||
hermes sessions export - # NOT 'hermes sessions list' - no --json flag available
|
||
```
|
||
Parse JSONL output with Python to find sessions with `last_active` within last 7 days
|
||
|
||
2. **Find Associated Jira Tickets**:
|
||
- Use `hindsight_recall` with query 'session summary jira' for each active session
|
||
- Search session messages for Jira patterns: `PLATFORM-\d+`, `AIOPS-\d+`, `RAID-\d+`, etc.
|
||
- Note: One session may have multiple Jira tickets (1-to-many mapping)
|
||
|
||
3. **Update Jira with Progress**:
|
||
```bash
|
||
ngn-jira POST '/rest/api/3/issue/<KEY>/comment' --body '{
|
||
"body": "Session activity update — Date: <today>, Last active: <last_active>. Session: <session_id>. Progress: See session transcript for details."
|
||
}'
|
||
```
|
||
|
||
4. **Generate Telegram Report**:
|
||
- Structure: Active Sessions + Jira Updated + Issues/Summary
|
||
- Keep under 4096 character limit
|
||
- Format with emoji sections for clarity
|
||
|
||
**Environment Requirements for Operational Jobs**:
|
||
- `JIRA_EMAIL` — Required for ngn-jira authentication
|
||
- `JIRA_API_TOKEN` — API token from Atlassian account
|
||
- Both must be set or Jira updates will fail
|
||
|
||
See `references/operational-monitoring.md` for detailed patterns, templates, and troubleshooting.
|
||
|
||
**Important Constraints (Cron Mode)**:
|
||
- DO NOT transition ticket statuses (D-05) - only add comments
|
||
- DO NOT update stale sessions (D-15) - only active within 7 days
|
||
- Use silent mode `[SILENT]` if no active sessions found
|
||
|
||
## Verification
|
||
|
||
1. On session start, agent checks for similar sessions via `hindsight_recall` ✓
|
||
2. Jira Task ticket created (or user declined) ✓
|
||
3. Confluence docs loaded by `ngn-agent` tag search (or user declined) ✓
|
||
4. At session end, user prompted for Jira update ✓
|
||
5. At session end, user prompted for Confluence update ✓
|
||
6. Session summary automatically saved to hindsight via `hindsight_retain` (no prompt) ✓
|
||
7. **Operational cron jobs can discover active sessions and update Jira tickets** ✓
|
||
SKILL
|
||
|
||
# Session reference file
|
||
cat > "$HOME/.hermes/skills/ngn-agent/session/references/operational-monitoring.md" << 'REF'
|
||
# Operational Session Monitoring
|
||
|
||
## Jira Ticket Pattern Detection
|
||
|
||
When scanning session content for associated Jira tickets, search for these patterns:
|
||
|
||
```python
|
||
jira_patterns = [
|
||
r'(PLATFORM-\d+)', # Platform engineering tickets
|
||
r'(AIOPS-\d+)', # AI Operations tickets
|
||
r'(RAID-\d+)', # RAID project tickets
|
||
r'(DEVOPS-\d+)', # DevOps tickets
|
||
r'(QAC-\d+)' # QAC tickets
|
||
]
|
||
```
|
||
|
||
## Session Export vs List Commands
|
||
|
||
**CORRECT**: `hermes sessions export -`
|
||
- Returns machine-readable JSONL format
|
||
- Each line is a complete session object
|
||
- Includes `last_active` timestamps for filtering
|
||
|
||
**INCORRECT**: `hermes sessions list --json`
|
||
- The `--json` flag does not exist (Pitfall from RESEARCH.md)
|
||
- Use export for automation, list for human viewing only
|
||
|
||
## Environment Variable Requirements
|
||
|
||
The `ngn-jira` tool wrapper expects:
|
||
- `JIRA_EMAIL` - Atlassian account email
|
||
- `JIRA_API_TOKEN` - From https://id.atlassian.com/manage/api-tokens
|
||
|
||
Missing either variable causes: `bash: line 10: JIRA_EMAIL: unbound variable`
|
||
|
||
## Telegram Report Template
|
||
|
||
```
|
||
📋 **ACTIVE SESSIONS** — {date}
|
||
|
||
🔹 **{session_id}**
|
||
Title: {title}
|
||
Last Active: {timestamp}
|
||
Jira: {ticket_keys or "None"}
|
||
|
||
🔄 **JIRA UPDATED**: {list of updated ticket keys}
|
||
|
||
❌ **ISSUES**: {any operational problems}
|
||
|
||
📊 **SUMMARY**: {count} active sessions found, {count} with Jira tickets
|
||
```
|
||
|
||
Character limit: 4096 for Telegram delivery
|
||
REF
|
||
echo " ✓ session/SKILL.md + references written"
|
||
}
|
||
|
||
# ---- Register cron jobs (D-10) ----
|
||
register_cron_jobs() {
|
||
echo " → Registering cron jobs (D-10)..."
|
||
|
||
# 1. ngn-daily-report (daily at 09:00 SGT)
|
||
echo " → Creating ngn-daily-report..."
|
||
hermes cron create --deliver telegram --skill session --skill jira-query \
|
||
'0 9 * * *' \
|
||
'Daily session report. Export sessions, find active ones, check Jira, compose Telegram summary.' \
|
||
2>/dev/null && echo " ✓ ngn-daily-report registered" \
|
||
|| echo " ⚠ ngn-daily-report may already exist"
|
||
|
||
# 2. ngn-weekly-stale-summary (Sunday 20:00 SGT)
|
||
echo " → Creating ngn-weekly-stale-summary..."
|
||
hermes cron create --deliver telegram --skill session \
|
||
'0 20 * * 0' \
|
||
'Weekly stale session summary. Review sessions inactive >30 days, compose Telegram summary.' \
|
||
2>/dev/null && echo " ✓ ngn-weekly-stale-summary registered" \
|
||
|| echo " ⚠ ngn-weekly-stale-summary may already exist"
|
||
|
||
# 3. ngn-weekly-archive (Sunday 20:05 SGT — 5 min after summary, per D-10)
|
||
echo " → Creating ngn-weekly-archive..."
|
||
hermes cron create --no-agent --script archive-stale-sessions.sh \
|
||
'5 20 * * 0' \
|
||
2>/dev/null && echo " ✓ ngn-weekly-archive registered" \
|
||
|| echo " ⚠ ngn-weekly-archive may already exist"
|
||
}
|
||
|
||
# ---- Offer gateway restart (per CONTEXT.md "Specific Ideas") ----
|
||
offer_gateway_restart() {
|
||
echo ""
|
||
echo "==> Setup complete!"
|
||
echo ""
|
||
read -p "Restart Hermes gateway now? [Y/n]: " restart
|
||
if [[ "$restart" =~ ^[Yy]?$ ]]; then
|
||
hermes gateway restart
|
||
echo " → Gateway restarted."
|
||
else
|
||
echo " → Skipped. Run 'hermes gateway restart' when ready."
|
||
fi
|
||
}
|
||
|
||
# =============================================================================
|
||
# Main Execution Block
|
||
# =============================================================================
|
||
main() {
|
||
echo ""
|
||
echo "=== ngn-agent Setup Script ==="
|
||
echo "Embedded file snapshots frozen at: 2026-06-15"
|
||
echo ""
|
||
|
||
# Step 1: Parse arguments (already done above)
|
||
# Step 2: Check prerequisites
|
||
echo "[1/14] Checking prerequisites..."
|
||
check_prerequisites
|
||
|
||
# Step 3: Print path summary
|
||
print_summary
|
||
|
||
# Step 4: Prompt for secrets
|
||
echo "[2/14] Collecting secrets..."
|
||
if [ "$NONINTERACTIVE" = false ]; then
|
||
JIRA_API_TOKEN=$(prompt_secret "JIRA_API_TOKEN" "JIRA API Token (https://id.atlassian.com/manage/api-tokens): ")
|
||
JIRA_EMAIL=$(prompt_secret "JIRA_EMAIL" "JIRA Email: ")
|
||
TELEGRAM_BOT_TOKEN=$(prompt_secret "TELEGRAM_BOT_TOKEN" "Telegram Bot Token (from @BotFather): ")
|
||
OPENROUTER_API_KEY=$(prompt_secret "OPENROUTER_API_KEY" "OpenRouter API Key (leave blank to keep existing): " "true")
|
||
else
|
||
echo " → Non-interactive mode — using environment variables"
|
||
: "${JIRA_API_TOKEN:?JIRA_API_TOKEN not set}"
|
||
: "${JIRA_EMAIL:?JIRA_EMAIL not set}"
|
||
: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN not set}"
|
||
fi
|
||
echo " ✓ Secrets collected"
|
||
|
||
# Step 5: Create directories
|
||
echo "[3/14] Creating directories..."
|
||
create_directories
|
||
|
||
# Step 6: Backup existing config
|
||
echo "[4/14] Backing up existing config..."
|
||
backup_config
|
||
|
||
# Step 7: Generate config.yaml
|
||
echo "[5/14] Generating config.yaml..."
|
||
generate_config_yaml
|
||
|
||
# Step 8: Generate .env
|
||
echo "[6/14] Generating .env..."
|
||
generate_env_file
|
||
|
||
# Step 9: Generate hindsight config
|
||
echo "[7/14] Generating hindsight config..."
|
||
generate_hindsight_config
|
||
|
||
# Step 10: Generate cron env config
|
||
echo "[8/14] Configuring cron environment..."
|
||
generate_cron_env_config
|
||
|
||
# Step 11: Write session-init script
|
||
echo "[9/14] Writing session-init script..."
|
||
write_session_init_script
|
||
|
||
# Step 12: Write archive script
|
||
echo "[10/14] Writing archive script..."
|
||
write_archive_script
|
||
|
||
# Step 13: Write skill files
|
||
echo "[11/14] Writing skill files..."
|
||
write_jira_skill
|
||
write_aws_skill
|
||
write_confluence_skill
|
||
write_bitbucket_skill
|
||
write_session_skill
|
||
|
||
# Step 14: Register cron jobs
|
||
echo "[12/14] Registering cron jobs..."
|
||
register_cron_jobs || echo " ⚠ Cron registration had issues (may already exist)"
|
||
|
||
echo "[13/14] Setup complete."
|
||
echo "[14/14] Offering gateway restart..."
|
||
offer_gateway_restart
|
||
}
|
||
|
||
main "$@"
|