1454 lines
55 KiB
Bash
Executable File
1454 lines
55 KiB
Bash
Executable File
#!/bin/bash
|
|
# Fellowship EC2 Instance Setup Script
|
|
# This script is downloaded from S3 and executed by user_data.sh
|
|
# Contains all setup logic: Docker, Docker Compose, DevOps Escape Room, and Fellowship SUT
|
|
set -e
|
|
|
|
# Logging setup - redirect all output to log file
|
|
LOG_FILE="/var/log/user-data.log"
|
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
|
|
|
# Function to log with timestamp
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
}
|
|
|
|
IMDS_BASE_URL="http://169.254.169.254/latest"
|
|
IMDS_TOKEN=""
|
|
|
|
get_imds_token() {
|
|
if [ -n "$IMDS_TOKEN" ]; then
|
|
echo "$IMDS_TOKEN"
|
|
return 0
|
|
fi
|
|
|
|
IMDS_TOKEN=$(curl -s --max-time 5 --connect-timeout 2 -X PUT "${IMDS_BASE_URL}/api/token" \
|
|
-H "X-aws-ec2-metadata-token-ttl-seconds: 21600" 2>/dev/null || echo "")
|
|
|
|
if [ -n "$IMDS_TOKEN" ]; then
|
|
echo "$IMDS_TOKEN"
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
get_instance_metadata() {
|
|
local path="$1"
|
|
local token
|
|
token=$(get_imds_token 2>/dev/null || echo "")
|
|
|
|
if [ -n "$token" ]; then
|
|
curl -s --max-time 5 --connect-timeout 2 -H "X-aws-ec2-metadata-token: ${token}" \
|
|
"${IMDS_BASE_URL}/meta-data/${path}" 2>/dev/null || echo ""
|
|
else
|
|
curl -s --max-time 5 --connect-timeout 2 "${IMDS_BASE_URL}/meta-data/${path}" 2>/dev/null || echo ""
|
|
fi
|
|
}
|
|
|
|
log "=========================================="
|
|
log "Fellowship Setup Script Started"
|
|
log "=========================================="
|
|
|
|
# Get AWS region with retries and fallback
|
|
AWS_REGION=""
|
|
for i in {1..5}; do
|
|
AWS_REGION=$(get_instance_metadata "placement/region")
|
|
[ -n "$AWS_REGION" ] && break
|
|
[ $i -lt 5 ] && sleep 2
|
|
done
|
|
[ -z "$AWS_REGION" ] && AWS_REGION="eu-west-1" && log "Using default region: $AWS_REGION" || log "Region: $AWS_REGION"
|
|
|
|
# Function to wait for yum lock
|
|
wait_for_yum() {
|
|
while sudo fuser /var/run/yum.pid >/dev/null 2>&1; do
|
|
log "Waiting for yum lock to be released..."
|
|
sleep 5
|
|
done
|
|
}
|
|
|
|
# Helper function to run docker commands as ec2-user with proper group membership
|
|
run_as_ec2user_docker() {
|
|
local cmd="$1"
|
|
# Use sg (switch group) to ensure docker group is active in the subshell
|
|
# This is more reliable than su - which may not pick up new group membership immediately
|
|
sg docker -c "su - ec2-user -c '$cmd'"
|
|
}
|
|
|
|
ensure_swap_for_small_instances() {
|
|
if [ -f /swapfile ]; then
|
|
return 0
|
|
fi
|
|
|
|
local mem_mb
|
|
mem_mb=$(awk '/MemTotal/ {print int($2/1024)}' /proc/meminfo 2>/dev/null || echo "0")
|
|
if [ "$mem_mb" -ge 3500 ]; then
|
|
return 0
|
|
fi
|
|
|
|
log "Low-memory instance detected (${mem_mb}MB). Creating 2GB swap to reduce bootstrap OOM risk..."
|
|
if command -v fallocate >/dev/null 2>&1; then
|
|
fallocate -l 2G /swapfile || dd if=/dev/zero of=/swapfile bs=1M count=2048
|
|
else
|
|
dd if=/dev/zero of=/swapfile bs=1M count=2048
|
|
fi
|
|
chmod 600 /swapfile
|
|
mkswap /swapfile >/dev/null 2>&1 || true
|
|
swapon /swapfile >/dev/null 2>&1 || true
|
|
grep -q '^/swapfile' /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab
|
|
log "✓ Swap configured"
|
|
}
|
|
|
|
# Wait for any existing yum processes to complete
|
|
log "Checking for yum locks..."
|
|
wait_for_yum
|
|
|
|
# Update and install dependencies
|
|
log "Installing Docker and Git..."
|
|
yum update -y
|
|
wait_for_yum
|
|
yum install -y docker git
|
|
systemctl start docker
|
|
systemctl enable docker
|
|
usermod -aG docker ec2-user
|
|
ensure_swap_for_small_instances
|
|
log "✓ Docker installed and started"
|
|
|
|
# Install Docker Compose plugin
|
|
log "Installing Docker Compose plugin..."
|
|
mkdir -p /home/ec2-user/.docker/cli-plugins/
|
|
if curl -SL https://github.com/docker/compose/releases/download/v2.27.0/docker-compose-linux-x86_64 -o /home/ec2-user/.docker/cli-plugins/docker-compose; then
|
|
chmod +x /home/ec2-user/.docker/cli-plugins/docker-compose
|
|
chown -R ec2-user:ec2-user /home/ec2-user/.docker
|
|
log "✓ Docker Compose plugin installed"
|
|
else
|
|
log "ERROR: Failed to download Docker Compose plugin"
|
|
exit 1
|
|
fi
|
|
|
|
dump_runtime_diagnostics() {
|
|
log "Runtime diagnostics (docker compose ps):"
|
|
run_as_ec2user_docker "cd ~ && docker compose ps" 2>&1 | sed 's/^/ /' || true
|
|
log "Runtime diagnostics (caddy logs tail):"
|
|
run_as_ec2user_docker "cd ~ && docker compose logs caddy --tail 120" 2>&1 | sed 's/^/ /' || true
|
|
log "Runtime diagnostics (frontend logs tail):"
|
|
run_as_ec2user_docker "cd ~ && docker compose logs frontend --tail 120" 2>&1 | sed 's/^/ /' || true
|
|
log "Runtime diagnostics (backend logs tail):"
|
|
run_as_ec2user_docker "cd ~ && docker compose logs backend --tail 120" 2>&1 | sed 's/^/ /' || true
|
|
}
|
|
|
|
# Function to verify network connectivity to external services (esp. github.com)
|
|
verify_network_connectivity() {
|
|
log "Verifying network connectivity to external services..."
|
|
local connectivity_ok=true
|
|
|
|
# Test DNS resolution for github.com
|
|
log " Testing DNS resolution for github.com..."
|
|
if ! getent hosts github.com >/dev/null 2>&1; then
|
|
log " WARNING: DNS resolution for github.com failed initially, retrying..."
|
|
sleep 2
|
|
if ! getent hosts github.com >/dev/null 2>&1; then
|
|
log " ERROR: DNS resolution for github.com failed"
|
|
connectivity_ok=false
|
|
else
|
|
log " OK: DNS resolution for github.com OK after retry"
|
|
fi
|
|
else
|
|
log " OK: DNS resolution for github.com OK"
|
|
fi
|
|
|
|
# Test HTTP connectivity to github.com
|
|
log " Testing HTTP connectivity to github.com..."
|
|
if ! timeout 10 curl -s -o /dev/null --head https://github.com 2>/dev/null; then
|
|
log " WARNING: Cannot reach github.com, retrying..."
|
|
sleep 2
|
|
if ! timeout 10 curl -s -o /dev/null --head https://github.com 2>/dev/null; then
|
|
log " ERROR: Cannot reach github.com (required for docker builds)"
|
|
connectivity_ok=false
|
|
else
|
|
log " OK: Connectivity to github.com OK after retry"
|
|
fi
|
|
else
|
|
log " OK: Connectivity to github.com OK"
|
|
fi
|
|
|
|
if [ "$connectivity_ok" = "true" ]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# DevOps Escape Room stack (Jenkins + Gitea + code-server + MailHog)
|
|
log "Setting up DevOps Escape Room stack..."
|
|
|
|
# Derive DevOps HTTPS subdomain names early if CADDY_DOMAIN is already available
|
|
# from the user_data environment variable (the common case for classroom instances).
|
|
# If CADDY_DOMAIN is not set yet (EC2-tag fallback), these will be re-derived later
|
|
# once the domain is confirmed.
|
|
if [ -n "${CADDY_DOMAIN:-}" ]; then
|
|
JENKINS_DOMAIN="${JENKINS_DOMAIN:-jenkins-${CADDY_DOMAIN}}"
|
|
IDE_DOMAIN="${IDE_DOMAIN:-ide-${CADDY_DOMAIN}}"
|
|
log "DevOps HTTPS subdomains derived from CADDY_DOMAIN:"
|
|
log " Jenkins: ${JENKINS_DOMAIN}"
|
|
log " IDE: ${IDE_DOMAIN}"
|
|
else
|
|
JENKINS_DOMAIN=""
|
|
IDE_DOMAIN=""
|
|
fi
|
|
|
|
# The devops-escape-room directory ships with the SUT tarball (extracted above),
|
|
# but the SUT extraction happens later. Write an inline compose file here so
|
|
# the stack can start in parallel. After the SUT is extracted the files will be
|
|
# replaced by the version from the tarball, which is identical.
|
|
mkdir -p /home/ec2-user/devops-escape-room
|
|
mkdir -p /home/ec2-user/jenkins/casc
|
|
mkdir -p /home/ec2-user/gitea
|
|
|
|
# ── Jenkins Dockerfile & plugins ────────────────────────────────────────────
|
|
cat > /home/ec2-user/jenkins/Dockerfile << 'JENKINSEOF'
|
|
FROM jenkins/jenkins:lts-jdk17
|
|
ENV JAVA_OPTS="-Djenkins.install.runSetupWizard=false"
|
|
ENV CASC_JENKINS_CONFIG=/var/jenkins_home/casc_configs
|
|
USER root
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
python3 python3-pip nodejs npm curl docker.io \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& usermod -aG docker jenkins || true
|
|
USER jenkins
|
|
COPY plugins.txt /usr/share/jenkins/plugins.txt
|
|
RUN jenkins-plugin-cli --plugin-file /usr/share/jenkins/plugins.txt --latest false
|
|
COPY casc/ /var/jenkins_home/casc_configs/
|
|
JENKINSEOF
|
|
|
|
cat > /home/ec2-user/jenkins/plugins.txt << 'PLUGINSEOF'
|
|
workflow-aggregator
|
|
pipeline-stage-view
|
|
blueocean
|
|
git
|
|
gitea
|
|
configuration-as-code
|
|
job-dsl
|
|
junit
|
|
htmlpublisher
|
|
build-timeout
|
|
credentials
|
|
credentials-binding
|
|
plain-credentials
|
|
ssh-credentials
|
|
mailer
|
|
email-ext
|
|
dashboard-view
|
|
build-monitor-plugin
|
|
docker-workflow
|
|
timestamper
|
|
ws-cleanup
|
|
antisamy-markup-formatter
|
|
PLUGINSEOF
|
|
|
|
cat > /home/ec2-user/jenkins/casc/jenkins.yaml << 'CASCEOF'
|
|
jenkins:
|
|
systemMessage: |
|
|
🧙 Welcome to the Fellowship's Jenkins CI!
|
|
One does not simply skip the pipeline...
|
|
|
|
numExecutors: 2
|
|
securityRealm:
|
|
local:
|
|
allowsSignup: false
|
|
users:
|
|
- id: "fellowship"
|
|
name: "Gandalf the Grey"
|
|
password: "${JENKINS_ADMIN_PASSWORD:-fellowship123}"
|
|
authorizationStrategy:
|
|
loggedInUsersCanDoAnything:
|
|
allowAnonymousRead: true
|
|
globalNodeProperties:
|
|
- envVars:
|
|
env:
|
|
- key: "GITEA_URL"
|
|
value: "http://gitea:3000"
|
|
- key: "SUT_REPO"
|
|
value: "http://gitea:3000/fellowship/lotr-sut.git"
|
|
|
|
unclassified:
|
|
location:
|
|
url: "http://localhost:8080/"
|
|
adminAddress: "gandalf@fellowship.local"
|
|
mailer:
|
|
smtpHost: "mailhog"
|
|
smtpPort: "1025"
|
|
useSsl: false
|
|
charset: "UTF-8"
|
|
|
|
jobs:
|
|
- script: |
|
|
pipelineJob('fellowship-sut-pipeline') {
|
|
displayName('Fellowship SUT — CI Pipeline')
|
|
description('One pipeline to build them all, and in the darkness test them.')
|
|
definition {
|
|
cpsScm {
|
|
scm {
|
|
git {
|
|
remote {
|
|
url('http://gitea:3000/fellowship/lotr-sut.git')
|
|
}
|
|
branch('*/main')
|
|
}
|
|
}
|
|
scriptPath('Jenkinsfile')
|
|
}
|
|
}
|
|
triggers {
|
|
scm('H/5 * * * *')
|
|
}
|
|
logRotator {
|
|
numToKeep(10)
|
|
}
|
|
}
|
|
CASCEOF
|
|
|
|
# ── Gitea init script ────────────────────────────────────────────────────────
|
|
cat > /home/ec2-user/gitea/init.sh << 'GITEAINITEOF'
|
|
#!/bin/sh
|
|
set -e
|
|
GITEA_URL="${GITEA_URL:-http://gitea:3000}"
|
|
ADMIN_USER="${GITEA_ADMIN_USER:-fellowship}"
|
|
ADMIN_PASS="${GITEA_ADMIN_PASSWORD:-fellowship123}"
|
|
ADMIN_EMAIL="${GITEA_ADMIN_EMAIL:-gandalf@fellowship.local}"
|
|
ORG_NAME="fellowship"
|
|
REPO_NAME="lotr-sut"
|
|
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [gitea-init] $*"; }
|
|
|
|
wait_for_gitea() {
|
|
log "Waiting for Gitea at ${GITEA_URL}..."
|
|
i=0; while [ $i -lt 30 ]; do
|
|
curl -sf "${GITEA_URL}/api/v1/version" > /dev/null 2>&1 && log "✓ Gitea ready" && return 0
|
|
log " attempt $((i+1))/30 — waiting 5s..."; sleep 5; i=$((i+1))
|
|
done
|
|
log "ERROR: Gitea not ready"; return 1
|
|
}
|
|
|
|
wait_for_admin() {
|
|
log "Waiting for Gitea admin user '${ADMIN_USER}' to be ready..."
|
|
i=0; while [ $i -lt 30 ]; do
|
|
curl -sf -u "${ADMIN_USER}:${ADMIN_PASS}" "${GITEA_URL}/api/v1/user" > /dev/null 2>&1 \
|
|
&& log "✓ Admin user ready" && return 0
|
|
log " attempt $((i+1))/30 — waiting 3s..."; sleep 3; i=$((i+1))
|
|
done
|
|
log "WARNING: Admin user not ready — proceeding anyway (some operations may fail)"
|
|
return 0
|
|
}
|
|
|
|
api() { curl -sf -u "${ADMIN_USER}:${ADMIN_PASS}" "$@"; }
|
|
|
|
wait_for_gitea
|
|
wait_for_admin
|
|
|
|
# Create org (ignore if exists)
|
|
api -X POST "${GITEA_URL}/api/v1/orgs" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"username\":\"${ORG_NAME}\",\"full_name\":\"The Fellowship of the Ring\",\"visibility\":\"public\"}" \
|
|
> /dev/null 2>&1 || true
|
|
log "✓ Organization '${ORG_NAME}' ready"
|
|
|
|
# Create repo (ignore if exists)
|
|
api -X POST "${GITEA_URL}/api/v1/orgs/${ORG_NAME}/repos" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"name\":\"${REPO_NAME}\",\"description\":\"LOTR SUT\",\"private\":false,\"auto_init\":false,\"default_branch\":\"main\"}" \
|
|
> /dev/null 2>&1 || true
|
|
log "✓ Repository '${ORG_NAME}/${REPO_NAME}' ready"
|
|
|
|
# Push code if repo is empty
|
|
COMMITS=$(api "${GITEA_URL}/api/v1/repos/${ORG_NAME}/${REPO_NAME}/commits?limit=1" 2>/dev/null | grep -c '"sha"' || echo "0")
|
|
if [ "$COMMITS" -gt 0 ]; then
|
|
log "✓ Repository already has commits — skipping push"; exit 0
|
|
fi
|
|
|
|
SRC=""
|
|
for d in /sut-source /home/ec2-user; do
|
|
[ -f "${d}/docker-compose.yml" ] && [ -d "${d}/sut" ] && SRC="$d" && break
|
|
done
|
|
|
|
if [ -z "$SRC" ]; then
|
|
log "WARNING: SUT source not found — skipping code push"; exit 0
|
|
fi
|
|
|
|
log "Pushing code from ${SRC} to Gitea..."
|
|
AUTH_URL=$(echo "${GITEA_URL}/${ORG_NAME}/${REPO_NAME}.git" | sed "s|http://|http://${ADMIN_USER}:${ADMIN_PASS}@|")
|
|
TMP=$(mktemp -d)
|
|
cp -a "${SRC}/." "${TMP}/"
|
|
cd "${TMP}"
|
|
rm -rf .git
|
|
git init -b main
|
|
git config user.email "${ADMIN_EMAIL}"
|
|
git config user.name "Gandalf the Grey"
|
|
git add -A
|
|
git commit -m "🧙 Initial commit: The Fellowship's Quest List SUT"
|
|
git remote add gitea "${AUTH_URL}"
|
|
git push gitea main
|
|
cd /; rm -rf "${TMP}"
|
|
log "✓ SUT code pushed to Gitea"
|
|
GITEAINITEOF
|
|
chmod +x /home/ec2-user/gitea/init.sh
|
|
|
|
# ── code-server custom Dockerfile + entrypoint ──────────────────────────────
|
|
# Extends codercom/code-server with Docker CLI, Compose v2, and pre-installed
|
|
# VS Code extensions (Python, Playwright, Copilot, Jupyter, Prettier).
|
|
mkdir -p /home/ec2-user/devops-escape-room/code-server
|
|
|
|
cat > /home/ec2-user/devops-escape-room/code-server/Dockerfile << 'CSRVDOCKEREOF'
|
|
FROM codercom/code-server:latest
|
|
|
|
USER root
|
|
|
|
# Docker CLI + gosu (for clean privilege drop) + utilities
|
|
RUN apt-get update && \
|
|
apt-get install -y --no-install-recommends \
|
|
docker.io \
|
|
gosu \
|
|
curl \
|
|
git \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Docker Compose v2 plugin + legacy symlink
|
|
RUN mkdir -p /usr/local/lib/docker/cli-plugins && \
|
|
curl -fsSL \
|
|
"https://github.com/docker/compose/releases/download/v2.27.0/docker-compose-linux-x86_64" \
|
|
-o /usr/local/lib/docker/cli-plugins/docker-compose && \
|
|
chmod +x /usr/local/lib/docker/cli-plugins/docker-compose && \
|
|
ln -sf /usr/local/lib/docker/cli-plugins/docker-compose /usr/local/bin/docker-compose
|
|
|
|
# Coder user docker group (GID re-aligned at runtime by entrypoint)
|
|
RUN groupadd -g 999 docker 2>/dev/null || groupmod -g 999 docker 2>/dev/null || true && \
|
|
usermod -aG docker coder
|
|
|
|
COPY entrypoint.sh /usr/bin/fellowship-docker-init.sh
|
|
RUN chmod +x /usr/bin/fellowship-docker-init.sh
|
|
|
|
USER root
|
|
ENTRYPOINT ["/usr/bin/fellowship-docker-init.sh"]
|
|
CSRVDOCKEREOF
|
|
|
|
cat > /home/ec2-user/devops-escape-room/code-server/entrypoint.sh << 'CSRVENTRYEOF'
|
|
#!/bin/bash
|
|
# Fellowship code-server entrypoint: fixes Docker GID, installs extensions, starts IDE
|
|
set -e
|
|
log() { echo "[$(date '+%H:%M:%S')] [fellowship-init] $*"; }
|
|
|
|
# Fix docker group GID to match host socket
|
|
if [ -S /var/run/docker.sock ]; then
|
|
DOCK_GID=$(stat -c '%g' /var/run/docker.sock)
|
|
if getent group docker > /dev/null 2>&1; then
|
|
groupmod -g "${DOCK_GID}" docker 2>/dev/null || true
|
|
else
|
|
groupadd -g "${DOCK_GID}" docker 2>/dev/null || true
|
|
fi
|
|
usermod -aG docker coder 2>/dev/null || true
|
|
chmod 666 /var/run/docker.sock 2>/dev/null || true
|
|
log "Docker group GID aligned to ${DOCK_GID}"
|
|
else
|
|
log "WARNING: docker.sock not mounted — docker unavailable in IDE terminal"
|
|
fi
|
|
|
|
# Install VS Code extensions as coder user
|
|
log "Installing VS Code extensions..."
|
|
for ext in ms-python.python github.copilot ms-playwright.playwright esbenp.prettier-vscode ms-toolsai.jupyter redhat.vscode-yaml ms-azuretools.vscode-docker; do
|
|
gosu coder code-server --install-extension "${ext}" --force > /dev/null 2>&1 && \
|
|
log " OK ${ext}" || log " SKIP ${ext}"
|
|
done
|
|
|
|
# Default settings
|
|
SETTINGS_DIR="/home/coder/.local/share/code-server/User"
|
|
if [ ! -f "${SETTINGS_DIR}/settings.json" ]; then
|
|
gosu coder mkdir -p "${SETTINGS_DIR}"
|
|
cat > "${SETTINGS_DIR}/settings.json" << 'SETTINGSEOF'
|
|
{
|
|
"python.defaultInterpreterPath": "/usr/bin/python3",
|
|
"editor.formatOnSave": true,
|
|
"terminal.integrated.defaultProfile.linux": "bash",
|
|
"git.autofetch": true,
|
|
"docker.host": "unix:///var/run/docker.sock"
|
|
}
|
|
SETTINGSEOF
|
|
chown coder:coder "${SETTINGS_DIR}/settings.json"
|
|
log "Default settings.json written"
|
|
fi
|
|
|
|
log "Starting code-server..."
|
|
exec gosu coder /usr/bin/entrypoint.sh "$@"
|
|
CSRVENTRYEOF
|
|
chmod +x /home/ec2-user/devops-escape-room/code-server/entrypoint.sh
|
|
chown -R ec2-user:ec2-user /home/ec2-user/devops-escape-room/code-server
|
|
log "✓ code-server Dockerfile and entrypoint written"
|
|
|
|
# ── devops-escape-room docker-compose ────────────────────────────────────────
|
|
cat > /home/ec2-user/devops-escape-room/docker-compose.yml << 'COMPOSEEOF'
|
|
# Fellowship DevOps Escape Room Stack
|
|
# Jenkins CI | Gitea Git | code-server IDE | MailHog mail
|
|
#
|
|
# Jenkins: http://HOST:8080 (fellowship / fellowship123)
|
|
# Gitea: http://HOST:3030 (fellowship / fellowship123)
|
|
# code-server: http://HOST:8443 (password: fellowship)
|
|
# MailHog: http://HOST:8025
|
|
|
|
services:
|
|
jenkins:
|
|
build:
|
|
context: ../jenkins
|
|
dockerfile: Dockerfile
|
|
image: fellowship-jenkins:latest
|
|
container_name: fellowship-jenkins
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8080:8080"
|
|
- "50000:50000"
|
|
volumes:
|
|
- jenkins_home:/var/jenkins_home
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
environment:
|
|
JENKINS_ADMIN_PASSWORD: ${JENKINS_ADMIN_PASSWORD:-fellowship123}
|
|
CASC_JENKINS_CONFIG: /var/jenkins_home/casc_configs
|
|
# JENKINS_URL is written to devops-escape-room/.env by setup_fellowship.sh
|
|
# once CADDY_DOMAIN is known, so Jenkins knows its canonical HTTPS URL.
|
|
JENKINS_URL: ${JENKINS_URL:-http://localhost:8080/}
|
|
depends_on:
|
|
gitea:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:8080/login || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 90s
|
|
|
|
gitea:
|
|
image: gitea/gitea:1.22
|
|
container_name: fellowship-gitea
|
|
restart: unless-stopped
|
|
ports:
|
|
- "3030:3000"
|
|
- "2222:22"
|
|
volumes:
|
|
- gitea_data:/data
|
|
environment:
|
|
USER_UID: "1000"
|
|
USER_GID: "1000"
|
|
GITEA__database__DB_TYPE: sqlite3
|
|
GITEA__server__DOMAIN: ${GITEA_DOMAIN:-localhost}
|
|
GITEA__server__HTTP_PORT: "3000"
|
|
GITEA__server__ROOT_URL: ${GITEA_ROOT_URL:-http://localhost:3030/}
|
|
GITEA__server__SSH_DOMAIN: localhost
|
|
GITEA__server__SSH_PORT: "2222"
|
|
GITEA__service__DISABLE_REGISTRATION: "false"
|
|
GITEA__service__REQUIRE_SIGNIN_VIEW: "false"
|
|
GITEA__security__INSTALL_LOCK: "true"
|
|
GITEA__mailer__ENABLED: "false"
|
|
# Pre-create the admin user on first boot via Gitea environment variables.
|
|
# Without these the gitea-init container cannot authenticate against the API.
|
|
GITEA__admin__ADMIN_USER: ${GITEA_ADMIN_USER:-fellowship}
|
|
GITEA__admin__ADMIN_PASSWD: ${GITEA_ADMIN_PASSWORD:-fellowship123}
|
|
GITEA__admin__ADMIN_EMAIL: ${GITEA_ADMIN_EMAIL:-gandalf@fellowship.local}
|
|
GITEA__admin__SEND_NOTIFY: "false"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:3000/api/v1/version || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 6
|
|
start_period: 30s
|
|
|
|
gitea-init:
|
|
image: alpine/git:latest
|
|
container_name: fellowship-gitea-init
|
|
restart: on-failure:5
|
|
environment:
|
|
GITEA_URL: "http://gitea:3000"
|
|
GITEA_ADMIN_USER: ${GITEA_ADMIN_USER:-fellowship}
|
|
GITEA_ADMIN_PASSWORD: ${GITEA_ADMIN_PASSWORD:-fellowship123}
|
|
GITEA_ADMIN_EMAIL: ${GITEA_ADMIN_EMAIL:-gandalf@fellowship.local}
|
|
GITEA_DOMAIN: ${GITEA_DOMAIN:-}
|
|
SUT_SOURCE_DIR: /sut-source
|
|
volumes:
|
|
- ../gitea/init.sh:/init.sh:ro
|
|
- /home/ec2-user/sut:/sut-source/sut:ro
|
|
- /home/ec2-user/docker-compose.yml:/sut-source/docker-compose.yml:ro
|
|
- /home/ec2-user/caddy:/sut-source/caddy:ro
|
|
- /home/ec2-user/nginx:/sut-source/nginx:ro
|
|
- /home/ec2-user/Jenkinsfile:/sut-source/Jenkinsfile:ro
|
|
entrypoint: ["/bin/sh", "/init.sh"]
|
|
depends_on:
|
|
gitea:
|
|
condition: service_healthy
|
|
|
|
code-server:
|
|
build:
|
|
context: ./code-server
|
|
dockerfile: Dockerfile
|
|
image: fellowship-code-server:latest
|
|
container_name: fellowship-code-server
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8443:8080"
|
|
volumes:
|
|
- /home/ec2-user:/home/coder/fellowship:rw
|
|
- codeserver_config:/home/coder/.config
|
|
# Mount Docker socket so students can run docker compose from the IDE terminal
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
environment:
|
|
PASSWORD: ${CODESERVER_PASSWORD:-fellowship}
|
|
command:
|
|
- --auth=password
|
|
- --bind-addr=0.0.0.0:8080
|
|
- /home/coder/fellowship
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:8080/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
|
|
mailhog:
|
|
image: mailhog/mailhog:v1.0.1
|
|
container_name: fellowship-mailhog
|
|
restart: unless-stopped
|
|
ports:
|
|
- "1025:1025"
|
|
- "8025:8025"
|
|
|
|
volumes:
|
|
jenkins_home:
|
|
driver: local
|
|
gitea_data:
|
|
driver: local
|
|
COMPOSEEOF
|
|
|
|
# Pre-create code-server config directory with proper permissions (bind mount)
|
|
# The coder user inside the container runs as 1000:1000, so the host directory
|
|
# must be owned by ec2-user (uid 1000) to avoid "permission denied" errors.
|
|
mkdir -p /home/ec2-user/.codeserver-config
|
|
chown 1000:1000 /home/ec2-user/.codeserver-config
|
|
chmod 700 /home/ec2-user/.codeserver-config
|
|
|
|
chown -R ec2-user:ec2-user /home/ec2-user/devops-escape-room
|
|
chown -R ec2-user:ec2-user /home/ec2-user/jenkins
|
|
chown -R ec2-user:ec2-user /home/ec2-user/gitea
|
|
|
|
# Write the devops-escape-room .env so Docker Compose can inject the correct
|
|
# JENKINS_URL into the Jenkins container before it starts.
|
|
# If JENKINS_DOMAIN is not yet known (EC2-tag path), Jenkins defaults to localhost
|
|
# and will be updated automatically after the SUT domain is resolved.
|
|
DEVOPS_JENKINS_URL="${JENKINS_DOMAIN:+https://${JENKINS_DOMAIN}/}"
|
|
DEVOPS_JENKINS_URL="${DEVOPS_JENKINS_URL:-http://localhost:8080/}"
|
|
|
|
# Copy devops-escape-room environment template to .env
|
|
log "Setting up devops-escape-room environment..."
|
|
if [ -f /home/ec2-user/devops-escape-room/.env.prod ]; then
|
|
cp /home/ec2-user/devops-escape-room/.env.prod /home/ec2-user/devops-escape-room/.env
|
|
chown ec2-user:ec2-user /home/ec2-user/devops-escape-room/.env
|
|
chmod 644 /home/ec2-user/devops-escape-room/.env
|
|
log "✓ Copied devops-escape-room/.env.prod to .env"
|
|
|
|
# Update JENKINS_URL if domain is available
|
|
if [ -n "$JENKINS_DOMAIN" ]; then
|
|
sed -i "" "s|^JENKINS_URL=.*|JENKINS_URL=${DEVOPS_JENKINS_URL}|" /home/ec2-user/devops-escape-room/.env
|
|
fi
|
|
else
|
|
# Fallback: create .env inline if template not found
|
|
cat > /home/ec2-user/devops-escape-room/.env << EOF
|
|
JENKINS_ADMIN_PASSWORD=fellowship123
|
|
GITEA_ADMIN_USER=fellowship
|
|
GITEA_ADMIN_PASSWORD=fellowship123
|
|
GITEA_ADMIN_EMAIL=gandalf@fellowship.local
|
|
CODESERVER_PASSWORD=fellowship
|
|
JENKINS_URL=${DEVOPS_JENKINS_URL}
|
|
GITEA_DOMAIN=
|
|
EOF
|
|
chown ec2-user:ec2-user /home/ec2-user/devops-escape-room/.env
|
|
log "✓ Wrote devops-escape-room/.env (fallback mode, JENKINS_URL=${DEVOPS_JENKINS_URL})"
|
|
fi
|
|
|
|
log "Building and starting DevOps Escape Room stack..."
|
|
if run_as_ec2user_docker "cd ~/devops-escape-room && docker compose up -d --build" 2>&1 | \
|
|
tee -a "$LOG_FILE"; then
|
|
log "✓ DevOps Escape Room stack started (Jenkins, Gitea, code-server, MailHog)"
|
|
else
|
|
log "WARNING: DevOps Escape Room stack may not have started cleanly — check logs"
|
|
fi
|
|
|
|
# Fellowship SUT Setup
|
|
log "Setting up Fellowship SUT..."
|
|
|
|
# Get SUT bucket from SSM
|
|
log "Retrieving SUT bucket from SSM: /classroom/fellowship/sut-bucket"
|
|
SUT_BUCKET=$(aws ssm get-parameter --name "/classroom/fellowship/sut-bucket" --query "Parameter.Value" --output text --region "${AWS_REGION}" 2>&1)
|
|
if [ $? -ne 0 ] || [ -z "$SUT_BUCKET" ] || [ "$SUT_BUCKET" = "None" ]; then
|
|
log "ERROR: Failed to get SUT bucket from SSM"
|
|
log "Error: $SUT_BUCKET"
|
|
exit 1
|
|
fi
|
|
log "SUT bucket: $SUT_BUCKET"
|
|
|
|
# Download SUT from S3
|
|
log "Finding latest SUT artifact in S3..."
|
|
LATEST_TAR=$(aws s3 ls "s3://${SUT_BUCKET}/" --region "${AWS_REGION}" | \
|
|
awk '/fellowship-sut-.*\.tar\.gz$/ {print $1" "$2" "$4}' | \
|
|
sort | tail -n 1 | awk '{print $3}')
|
|
|
|
if [ -z "$LATEST_TAR" ]; then
|
|
log "ERROR: No fellowship-sut-*.tar.gz artifact found in S3 bucket"
|
|
exit 1
|
|
fi
|
|
|
|
log "Downloading latest SUT artifact: $LATEST_TAR"
|
|
if ! aws s3 cp "s3://${SUT_BUCKET}/${LATEST_TAR}" /tmp/fellowship-sut.tar.gz --region "${AWS_REGION}" >/dev/null 2>&1 || [ ! -f "/tmp/fellowship-sut.tar.gz" ]; then
|
|
log "ERROR: Failed to download SUT from S3"
|
|
log "Expected location: s3://${SUT_BUCKET}/${LATEST_TAR}"
|
|
exit 1
|
|
fi
|
|
log "✓ SUT downloaded"
|
|
|
|
# Extract SUT
|
|
log "Extracting SUT..."
|
|
if ! tar -xzf /tmp/fellowship-sut.tar.gz -C /home/ec2-user/ 2>/dev/null; then
|
|
log "ERROR: Failed to extract SUT"
|
|
exit 1
|
|
fi
|
|
rm -f /tmp/fellowship-sut.tar.gz
|
|
|
|
# Tarball extracts to sut/ and docker-compose.yml at home root - chown both
|
|
chown -R ec2-user:ec2-user /home/ec2-user/sut 2>/dev/null || true
|
|
chown ec2-user:ec2-user /home/ec2-user/docker-compose.yml 2>/dev/null || true
|
|
chown -R ec2-user:ec2-user /home/ec2-user/caddy 2>/dev/null || true
|
|
chown -R ec2-user:ec2-user /home/ec2-user/nginx 2>/dev/null || true
|
|
log "✓ SUT extracted"
|
|
|
|
# Copy environment template to .env for docker-compose
|
|
# This ensures COMPOSE_PROJECT_NAME=fellowship (production) is used
|
|
log "Setting up production environment (.env.prod → .env)..."
|
|
if [ -f /home/ec2-user/.env.prod ]; then
|
|
cp /home/ec2-user/.env.prod /home/ec2-user/.env
|
|
chown ec2-user:ec2-user /home/ec2-user/.env
|
|
chmod 644 /home/ec2-user/.env
|
|
log "✓ Copied .env.prod to .env"
|
|
else
|
|
log "WARNING: .env.prod not found — .env will be created from scratch"
|
|
fi
|
|
|
|
# Get instance domain for Caddy
|
|
# PRIORITY 1: Check if domain was passed via user_data environment variable
|
|
# This is the most reliable method - domain is known before instance creation
|
|
log "Getting instance domain for Caddy..."
|
|
if [ -n "$CADDY_DOMAIN" ] && [ "$CADDY_DOMAIN" != "" ]; then
|
|
log "✓ Found Caddy domain from user_data environment: $CADDY_DOMAIN"
|
|
# Domain is already set, no need to query EC2 tags
|
|
else
|
|
# PRIORITY 2: Fallback to EC2 tags (requires instance ID from metadata service)
|
|
log "Domain not in environment, attempting to get from EC2 tags..."
|
|
INSTANCE_ID=""
|
|
CADDY_DOMAIN=""
|
|
|
|
# Retry getting instance ID (metadata service may not be ready immediately)
|
|
for i in {1..10}; do
|
|
INSTANCE_ID=$(get_instance_metadata "instance-id")
|
|
if [ -n "$INSTANCE_ID" ]; then
|
|
log "✓ Got instance ID: $INSTANCE_ID"
|
|
break
|
|
fi
|
|
if [ $i -lt 10 ]; then
|
|
log " Attempt $i/10: Instance ID not available yet, waiting 2s..."
|
|
sleep 2
|
|
fi
|
|
done
|
|
|
|
if [ -n "$INSTANCE_ID" ]; then
|
|
# Get domain from instance tags (set by Lambda BEFORE instance creation)
|
|
# With predictable domain names, this should be available immediately
|
|
log "Retrieving HttpsDomain tag from instance tags..."
|
|
for i in {1..6}; do
|
|
CADDY_DOMAIN=$(aws ec2 describe-tags --region "${AWS_REGION}" --filters "Name=resource-id,Values=${INSTANCE_ID}" "Name=key,Values=HttpsDomain" --query "Tags[0].Value" --output text 2>/dev/null || echo "")
|
|
if [ -n "$CADDY_DOMAIN" ] && [ "$CADDY_DOMAIN" != "None" ] && [ "$CADDY_DOMAIN" != "" ]; then
|
|
log "✓ Found Caddy domain from tags: $CADDY_DOMAIN"
|
|
break
|
|
fi
|
|
if [ $i -lt 6 ]; then
|
|
log " Attempt $i/6: HttpsDomain tag not found yet, waiting 2s..."
|
|
sleep 2
|
|
fi
|
|
done
|
|
else
|
|
log "WARNING: Could not get instance ID after retries"
|
|
fi
|
|
|
|
# Final check
|
|
if [ -z "$CADDY_DOMAIN" ] || [ "$CADDY_DOMAIN" = "None" ] || [ "$CADDY_DOMAIN" = "" ]; then
|
|
log "ERROR: Caddy domain not found - cannot deploy AWS Fellowship SUT without a valid domain"
|
|
log " Ensure HttpsDomain tag is set before instance bootstrap"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Normalize to lowercase to avoid mixed-case DNS/tag drift
|
|
CADDY_DOMAIN=$(echo "$CADDY_DOMAIN" | tr '[:upper:]' '[:lower:]')
|
|
|
|
# Enforce domain presence for AWS deployment
|
|
if [ -z "$CADDY_DOMAIN" ] || [ "$CADDY_DOMAIN" = "None" ] || [ "$CADDY_DOMAIN" = "" ]; then
|
|
log "ERROR: Caddy domain is required for AWS deployment"
|
|
exit 1
|
|
fi
|
|
|
|
# Wait for DNS propagation before starting containers (required for Caddy automatic HTTPS)
|
|
PUBLIC_IP_FOR_DNS=$(get_instance_metadata "public-ipv4")
|
|
if [ -z "$PUBLIC_IP_FOR_DNS" ]; then
|
|
log "ERROR: Could not retrieve instance public IP for DNS verification"
|
|
exit 1
|
|
fi
|
|
|
|
resolve_domain_ipv4() {
|
|
local domain="$1"
|
|
local resolved_ip
|
|
|
|
resolved_ip=$(getent ahostsv4 "$domain" 2>/dev/null | awk '{print $1}' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' | head -1 || true)
|
|
if [ -z "$resolved_ip" ]; then
|
|
resolved_ip=$(nslookup "$domain" 2>/dev/null | awk '/^Address: / {print $2}' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' | tail -1 || true)
|
|
fi
|
|
|
|
echo "$resolved_ip"
|
|
}
|
|
|
|
log "Waiting for DNS propagation: ${CADDY_DOMAIN} -> ${PUBLIC_IP_FOR_DNS}"
|
|
DNS_MATCHED="false"
|
|
for i in {1..30}; do
|
|
RESOLVED_IP=$(resolve_domain_ipv4 "$CADDY_DOMAIN")
|
|
if [ "$RESOLVED_IP" = "$PUBLIC_IP_FOR_DNS" ]; then
|
|
DNS_MATCHED="true"
|
|
log "✓ DNS propagation complete (${CADDY_DOMAIN} resolves to ${RESOLVED_IP})"
|
|
break
|
|
fi
|
|
|
|
log " Attempt $i/30: ${CADDY_DOMAIN} resolves to '${RESOLVED_IP:-unresolved}' (expected ${PUBLIC_IP_FOR_DNS}), waiting 10s..."
|
|
sleep 10
|
|
done
|
|
|
|
if [ "$DNS_MATCHED" != "true" ]; then
|
|
log "ERROR: DNS propagation timeout after 5 minutes"
|
|
log " ${CADDY_DOMAIN} did not resolve to instance public IP ${PUBLIC_IP_FOR_DNS}"
|
|
log " Caddy automatic HTTPS cannot succeed until DNS is correct"
|
|
exit 1
|
|
fi
|
|
|
|
# ── Derive DevOps HTTPS subdomain names ──────────────────────────────────────
|
|
# Pattern: jenkins-{CADDY_DOMAIN} and ide-{CADDY_DOMAIN}
|
|
# These may already be set from the early derivation above; re-assign to ensure
|
|
# they reflect the confirmed (possibly tag-derived) CADDY_DOMAIN.
|
|
JENKINS_DOMAIN="jenkins-${CADDY_DOMAIN}"
|
|
IDE_DOMAIN="ide-${CADDY_DOMAIN}"
|
|
GITEA_DOMAIN="gitea-${CADDY_DOMAIN}"
|
|
log "DevOps HTTPS subdomains confirmed:"
|
|
log " Jenkins: ${JENKINS_DOMAIN}"
|
|
log " IDE: ${IDE_DOMAIN}"
|
|
log " Gitea: ${GITEA_DOMAIN}"
|
|
|
|
# ── Create Route53 A records for jenkins and ide subdomains ──────────────────
|
|
# Both subdomains must point to the same instance IP as CADDY_DOMAIN.
|
|
# Try to find the hosted-zone ID from (in order):
|
|
# 1. EC2 instance tag Route53ZoneId
|
|
# 2. SSM parameter /classroom/fellowship/route53-zone-id
|
|
# 3. Route53 lookup by parent domain
|
|
ROUTE53_ZONE_ID="${ROUTE53_ZONE_ID:-}"
|
|
if [ -n "$ROUTE53_ZONE_ID" ] && [ "$ROUTE53_ZONE_ID" != "None" ]; then
|
|
log "Using Route53 zone ID from environment: ${ROUTE53_ZONE_ID}"
|
|
else
|
|
ROUTE53_ZONE_ID=""
|
|
fi
|
|
|
|
# Source 1: EC2 instance tag (set by the provisioning Lambda)
|
|
if [ -n "${INSTANCE_ID:-}" ]; then
|
|
ROUTE53_ZONE_ID=$(aws ec2 describe-tags --region "${AWS_REGION}" \
|
|
--filters "Name=resource-id,Values=${INSTANCE_ID}" "Name=key,Values=Route53ZoneId" \
|
|
--query "Tags[0].Value" --output text 2>/dev/null || echo "")
|
|
[ "$ROUTE53_ZONE_ID" = "None" ] && ROUTE53_ZONE_ID=""
|
|
fi
|
|
|
|
# Source 2: SSM parameter
|
|
if [ -z "$ROUTE53_ZONE_ID" ]; then
|
|
ROUTE53_ZONE_ID=$(aws ssm get-parameter \
|
|
--name "/classroom/fellowship/route53-zone-id" \
|
|
--query "Parameter.Value" --output text --region "${AWS_REGION}" 2>/dev/null || echo "")
|
|
[ "$ROUTE53_ZONE_ID" = "None" ] && ROUTE53_ZONE_ID=""
|
|
fi
|
|
|
|
# Source 3: Walk up the DNS tree stripping one label at a time until a
|
|
# matching hosted zone is found. A single sed strip is not enough when
|
|
# CADDY_DOMAIN has multiple subdomain levels (e.g.
|
|
# fellowship-<id>.fellowship.testingfantasy.com — stripping one label gives
|
|
# fellowship.testingfantasy.com which is NOT a hosted zone; the actual zone
|
|
# is testingfantasy.com two levels up).
|
|
if [ -z "$ROUTE53_ZONE_ID" ]; then
|
|
ZONE_SUFFIX=$(echo "$CADDY_DOMAIN" | sed 's/^[^.]*\.//')
|
|
while [ -n "$ZONE_SUFFIX" ] && [ "$ZONE_SUFFIX" != "${ZONE_SUFFIX#*.}" ]; do
|
|
CANDIDATE=$(aws route53 list-hosted-zones-by-name \
|
|
--dns-name "${ZONE_SUFFIX}" \
|
|
--query "HostedZones[?Name==\`${ZONE_SUFFIX}.\`].Id" \
|
|
--output text 2>/dev/null \
|
|
| sed 's|/hostedzone/||' || echo "")
|
|
if [ -n "$CANDIDATE" ] && [ "$CANDIDATE" != "None" ] && [ "$CANDIDATE" != "\t" ]; then
|
|
ROUTE53_ZONE_ID="$CANDIDATE"
|
|
log " Route53 zone found via DNS tree walk: ${ZONE_SUFFIX} → ${ROUTE53_ZONE_ID}"
|
|
break
|
|
fi
|
|
ZONE_SUFFIX=$(echo "$ZONE_SUFFIX" | sed 's/^[^.]*\.//')
|
|
done
|
|
[ "${ROUTE53_ZONE_ID:-}" = "None" ] && ROUTE53_ZONE_ID=""
|
|
fi
|
|
|
|
if [ -n "$ROUTE53_ZONE_ID" ]; then
|
|
log "Creating/updating Route53 A records for DevOps subdomains (zone ${ROUTE53_ZONE_ID})..."
|
|
aws route53 change-resource-record-sets \
|
|
--hosted-zone-id "$ROUTE53_ZONE_ID" \
|
|
--change-batch "{
|
|
\"Comment\": \"Fellowship DevOps Escape Room HTTPS subdomains\",
|
|
\"Changes\": [
|
|
{
|
|
\"Action\": \"UPSERT\",
|
|
\"ResourceRecordSet\": {
|
|
\"Name\": \"${JENKINS_DOMAIN}\",
|
|
\"Type\": \"A\",
|
|
\"TTL\": 60,
|
|
\"ResourceRecords\": [{\"Value\": \"${PUBLIC_IP_FOR_DNS}\"}]
|
|
}
|
|
},
|
|
{
|
|
\"Action\": \"UPSERT\",
|
|
\"ResourceRecordSet\": {
|
|
\"Name\": \"${IDE_DOMAIN}\",
|
|
\"Type\": \"A\",
|
|
\"TTL\": 60,
|
|
\"ResourceRecords\": [{\"Value\": \"${PUBLIC_IP_FOR_DNS}\"}]
|
|
}
|
|
},
|
|
{
|
|
\"Action\": \"UPSERT\",
|
|
\"ResourceRecordSet\": {
|
|
\"Name\": \"${GITEA_DOMAIN}\",
|
|
\"Type\": \"A\",
|
|
\"TTL\": 60,
|
|
\"ResourceRecords\": [{\"Value\": \"${PUBLIC_IP_FOR_DNS}\"}]
|
|
}
|
|
}
|
|
]
|
|
}" \
|
|
--region "$AWS_REGION" 2>&1 | tee -a "$LOG_FILE" \
|
|
&& log "✓ Route53 A records upserted for ${JENKINS_DOMAIN}, ${IDE_DOMAIN} and ${GITEA_DOMAIN}" \
|
|
|| log "WARNING: Route53 record update failed — manual DNS setup may be required"
|
|
else
|
|
log "WARNING: Route53 zone ID not found — DevOps subdomains need manual DNS setup:"
|
|
log " A record: ${JENKINS_DOMAIN} → ${PUBLIC_IP_FOR_DNS}"
|
|
log " A record: ${IDE_DOMAIN} → ${PUBLIC_IP_FOR_DNS}"
|
|
log " A record: ${GITEA_DOMAIN} → ${PUBLIC_IP_FOR_DNS}"
|
|
fi
|
|
|
|
# Update the devops-escape-room .env so Jenkins knows its canonical HTTPS URL.
|
|
# The devops stack may already be running; the container will pick up the new URL
|
|
# on the next restart or if Jenkins JCasC is reloaded.
|
|
cat > /home/ec2-user/devops-escape-room/.env << EOF
|
|
JENKINS_ADMIN_PASSWORD=fellowship123
|
|
GITEA_ADMIN_USER=fellowship
|
|
GITEA_ADMIN_PASSWORD=fellowship123
|
|
GITEA_ADMIN_EMAIL=gandalf@fellowship.local
|
|
CODESERVER_PASSWORD=fellowship
|
|
JENKINS_URL=https://${JENKINS_DOMAIN}/
|
|
GITEA_DOMAIN=${GITEA_DOMAIN}
|
|
GITEA_ROOT_URL=https://${GITEA_DOMAIN}/
|
|
EOF
|
|
chown ec2-user:ec2-user /home/ec2-user/devops-escape-room/.env
|
|
log "✓ Updated devops-escape-room/.env with JENKINS_URL=https://${JENKINS_DOMAIN}/ GITEA_DOMAIN=${GITEA_DOMAIN}"
|
|
|
|
# Restart Gitea so it picks up the correct GITEA_ROOT_URL and GITEA_DOMAIN.
|
|
# (The escape room stack was started earlier with an empty GITEA_DOMAIN; now that
|
|
# the domain is known, we restart just the gitea service so ROOT_URL is correct.)
|
|
run_as_ec2user_docker "cd ~/devops-escape-room && docker compose restart gitea" 2>&1 | tee -a "$LOG_FILE" \
|
|
&& log "✓ Gitea restarted with GITEA_ROOT_URL=https://${GITEA_DOMAIN}/" \
|
|
|| log "WARNING: Gitea restart failed — ROOT_URL may still point to localhost"
|
|
|
|
# Deploy SUT
|
|
log "Deploying SUT..."
|
|
if [ ! -f "/home/ec2-user/docker-compose.yml" ]; then
|
|
log "ERROR: SUT docker-compose.yml not found"
|
|
exit 1
|
|
fi
|
|
if [ ! -x "/home/ec2-user/.docker/cli-plugins/docker-compose" ]; then
|
|
log "ERROR: Docker Compose plugin not executable"
|
|
exit 1
|
|
fi
|
|
|
|
# Get Azure OpenAI credentials from Secrets Manager
|
|
log "Retrieving Azure OpenAI credentials from Secrets Manager..."
|
|
AZURE_SECRET=""
|
|
AZURE_ENDPOINT=""
|
|
AZURE_API_KEY=""
|
|
AZURE_DEPLOYMENT=""
|
|
AZURE_API_VERSION=""
|
|
|
|
# Primary secret path (new schema)
|
|
SECRET_NAME="azure/llm/configs"
|
|
|
|
# Legacy fallback path (old schema)
|
|
ENVIRONMENT="${ENVIRONMENT:-dev}"
|
|
LEGACY_SECRET_NAME="classroom/shared/${ENVIRONMENT}/azure-openai"
|
|
|
|
normalize_azure_endpoint() {
|
|
local raw_endpoint="$1"
|
|
|
|
# Remove query string if present
|
|
raw_endpoint="${raw_endpoint%%\?*}"
|
|
|
|
# Convert full Azure OpenAI operation URL to resource base endpoint
|
|
# Example:
|
|
# https://resource.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=...
|
|
# -> https://resource.openai.azure.com
|
|
if [[ "$raw_endpoint" == *"/openai/"* ]]; then
|
|
raw_endpoint="${raw_endpoint%%/openai/*}"
|
|
fi
|
|
|
|
# Remove trailing slash for consistency
|
|
raw_endpoint="${raw_endpoint%/}"
|
|
|
|
echo "$raw_endpoint"
|
|
}
|
|
|
|
AZURE_SECRET=$(aws secretsmanager get-secret-value \
|
|
--secret-id "$SECRET_NAME" \
|
|
--region "${AWS_REGION}" \
|
|
--query SecretString \
|
|
--output text 2>/dev/null || echo "")
|
|
|
|
if [ -z "$AZURE_SECRET" ] || [ "$AZURE_SECRET" = "None" ]; then
|
|
log "Primary secret not found, trying legacy Azure OpenAI secret..."
|
|
AZURE_SECRET=$(aws secretsmanager get-secret-value \
|
|
--secret-id "$LEGACY_SECRET_NAME" \
|
|
--region "${AWS_REGION}" \
|
|
--query SecretString \
|
|
--output text 2>/dev/null || echo "")
|
|
|
|
if [ -n "$AZURE_SECRET" ] && [ "$AZURE_SECRET" != "None" ]; then
|
|
SECRET_NAME="$LEGACY_SECRET_NAME"
|
|
fi
|
|
fi
|
|
|
|
if [ -z "$AZURE_SECRET" ] || [ "$AZURE_SECRET" = "None" ]; then
|
|
log "WARNING: Failed to retrieve Azure OpenAI secret from Secrets Manager"
|
|
log " Secret name: $SECRET_NAME"
|
|
log " Region: $AWS_REGION"
|
|
log " Fellowship SUT will work with fallback responses only (no Azure AI)"
|
|
else
|
|
# Parse secret JSON and extract values from either:
|
|
# - New schema: array under azure/llm/configs
|
|
# - Legacy schema: single object under classroom/shared/<env>/azure-openai
|
|
if command -v jq &> /dev/null; then
|
|
JQ_PICK_FILTER='
|
|
def pick:
|
|
if type == "array" then
|
|
(map(select((.config_name // "" | ascii_downcase) | test("gpt[ -]?4o"))) | .[0])
|
|
// (map(select((.endpoint // "" | ascii_downcase) | contains("/chat/completions"))) | .[0])
|
|
// (map(select((.config_name // "" | ascii_downcase) | test("gpt"))) | .[0])
|
|
// .[0]
|
|
else . end;
|
|
pick
|
|
'
|
|
|
|
AZURE_ENDPOINT=$(echo "$AZURE_SECRET" | jq -r "$JQ_PICK_FILTER | .endpoint // empty" 2>/dev/null || echo "")
|
|
AZURE_API_KEY=$(echo "$AZURE_SECRET" | jq -r "$JQ_PICK_FILTER | .api_key // empty" 2>/dev/null || echo "")
|
|
AZURE_DEPLOYMENT=$(echo "$AZURE_SECRET" | jq -r "$JQ_PICK_FILTER | .deployment_name // .deployment // empty" 2>/dev/null || echo "")
|
|
AZURE_API_VERSION=$(echo "$AZURE_SECRET" | jq -r "$JQ_PICK_FILTER | .api_version // empty" 2>/dev/null || echo "")
|
|
elif command -v python3 &> /dev/null; then
|
|
PARSED_AZURE=$(AZURE_SECRET="$AZURE_SECRET" python3 << 'PY'
|
|
import json
|
|
import os
|
|
|
|
raw = os.environ.get("AZURE_SECRET", "")
|
|
try:
|
|
data = json.loads(raw)
|
|
except Exception:
|
|
data = {}
|
|
|
|
selected = {}
|
|
if isinstance(data, list):
|
|
def config_name(item):
|
|
return str(item.get("config_name", "")).lower()
|
|
def endpoint(item):
|
|
return str(item.get("endpoint", "")).lower()
|
|
|
|
selected = (
|
|
next((x for x in data if "gpt-4o" in config_name(x) or "gpt 4-o" in config_name(x)), None)
|
|
or next((x for x in data if "/chat/completions" in endpoint(x)), None)
|
|
or next((x for x in data if "gpt" in config_name(x)), None)
|
|
or (data[0] if data else {})
|
|
)
|
|
elif isinstance(data, dict):
|
|
selected = data
|
|
|
|
endpoint_val = selected.get("endpoint", "")
|
|
api_key_val = selected.get("api_key", "")
|
|
deployment_val = selected.get("deployment_name") or selected.get("deployment") or ""
|
|
api_version_val = selected.get("api_version", "")
|
|
|
|
print(f"endpoint={endpoint_val}")
|
|
print(f"api_key={api_key_val}")
|
|
print(f"deployment={deployment_val}")
|
|
print(f"api_version={api_version_val}")
|
|
PY
|
|
)
|
|
|
|
while IFS='=' read -r key value; do
|
|
case "$key" in
|
|
endpoint) AZURE_ENDPOINT="$value" ;;
|
|
api_key) AZURE_API_KEY="$value" ;;
|
|
deployment) AZURE_DEPLOYMENT="$value" ;;
|
|
api_version) AZURE_API_VERSION="$value" ;;
|
|
esac
|
|
done <<< "$PARSED_AZURE"
|
|
else
|
|
log "WARNING: Neither jq nor python3 available; using best-effort grep parsing"
|
|
AZURE_ENDPOINT=$(echo "$AZURE_SECRET" | grep -m1 -o '"endpoint"[[:space:]]*:[[:space:]]*"[^"]*"' | sed -E 's/^.*"endpoint"[[:space:]]*:[[:space:]]*"([^"]*)"$/\1/' || echo "")
|
|
AZURE_API_KEY=$(echo "$AZURE_SECRET" | grep -m1 -o '"api_key"[[:space:]]*:[[:space:]]*"[^"]*"' | sed -E 's/^.*"api_key"[[:space:]]*:[[:space:]]*"([^"]*)"$/\1/' || echo "")
|
|
AZURE_DEPLOYMENT=$(echo "$AZURE_SECRET" | grep -m1 -o '"deployment_name"[[:space:]]*:[[:space:]]*"[^"]*"' | sed -E 's/^.*"deployment_name"[[:space:]]*:[[:space:]]*"([^"]*)"$/\1/' || echo "")
|
|
if [ -z "$AZURE_DEPLOYMENT" ]; then
|
|
AZURE_DEPLOYMENT=$(echo "$AZURE_SECRET" | grep -m1 -o '"deployment"[[:space:]]*:[[:space:]]*"[^"]*"' | sed -E 's/^.*"deployment"[[:space:]]*:[[:space:]]*"([^"]*)"$/\1/' || echo "")
|
|
fi
|
|
AZURE_API_VERSION=$(echo "$AZURE_SECRET" | grep -m1 -o '"api_version"[[:space:]]*:[[:space:]]*"[^"]*"' | sed -E 's/^.*"api_version"[[:space:]]*:[[:space:]]*"([^"]*)"$/\1/' || echo "")
|
|
fi
|
|
|
|
# Ensure endpoint is in Azure resource base URL format expected by AzureOpenAI client
|
|
AZURE_ENDPOINT=$(normalize_azure_endpoint "$AZURE_ENDPOINT")
|
|
|
|
if [ -n "$AZURE_ENDPOINT" ] && [ -n "$AZURE_API_KEY" ] && [ -n "$AZURE_DEPLOYMENT" ]; then
|
|
log "✓ Azure OpenAI credentials retrieved successfully"
|
|
log " Secret source: ${SECRET_NAME}"
|
|
log " Deployment: ${AZURE_DEPLOYMENT:-not set}"
|
|
log " API Version: ${AZURE_API_VERSION:-not set}"
|
|
log " Endpoint: ${AZURE_ENDPOINT}"
|
|
else
|
|
log "WARNING: Failed to parse Azure credentials from secret"
|
|
log " Secret source: ${SECRET_NAME}"
|
|
log " Endpoint: ${AZURE_ENDPOINT:-empty}"
|
|
log " API Key: ${AZURE_API_KEY:0:10}***"
|
|
log " Deployment: ${AZURE_DEPLOYMENT:-empty}"
|
|
fi
|
|
fi
|
|
|
|
# Create .env file for docker-compose BEFORE deployment
|
|
# This ensures all environment variables are persistently available
|
|
log "Updating .env file with deployment-specific configuration..."
|
|
|
|
# Function to update or add an env variable in .env file
|
|
update_env_var() {
|
|
local key="$1"
|
|
local value="$2"
|
|
local env_file="/home/ec2-user/.env"
|
|
|
|
if grep -q "^${key}=" "$env_file" 2>/dev/null; then
|
|
# Variable exists, update it
|
|
sed -i "" "s|^${key}=.*|${key}=${value}|" "$env_file"
|
|
else
|
|
# Variable doesn't exist, append it
|
|
echo "${key}=${value}" >> "$env_file"
|
|
fi
|
|
}
|
|
|
|
# Update domain configuration from resolved values
|
|
update_env_var "CADDY_DOMAIN" "${CADDY_DOMAIN:-localhost}"
|
|
update_env_var "JENKINS_DOMAIN" "${JENKINS_DOMAIN:-}"
|
|
update_env_var "IDE_DOMAIN" "${IDE_DOMAIN:-}"
|
|
update_env_var "GITEA_DOMAIN" "${GITEA_DOMAIN:-}"
|
|
update_env_var "CADDYFILE_PATH" "./caddy/Caddyfile"
|
|
update_env_var "FRONTEND_MODE" "prod"
|
|
update_env_var "FLASK_ENV" "production"
|
|
update_env_var "NODE_ENV" "production"
|
|
|
|
# Add optional Azure OpenAI Configuration if available
|
|
if [ -n "$AZURE_ENDPOINT" ]; then
|
|
update_env_var "AZURE_OPENAI_ENDPOINT" "${AZURE_ENDPOINT}"
|
|
update_env_var "AZURE_OPENAI_API_KEY" "${AZURE_API_KEY}"
|
|
update_env_var "AZURE_OPENAI_DEPLOYMENT" "${AZURE_DEPLOYMENT}"
|
|
update_env_var "AZURE_OPENAI_API_VERSION" "${AZURE_API_VERSION}"
|
|
fi
|
|
|
|
# Add GitHub Token if available
|
|
if [ -n "$GITHUB_TOKEN" ]; then
|
|
update_env_var "GITHUB_TOKEN" "${GITHUB_TOKEN}"
|
|
fi
|
|
|
|
chown ec2-user:ec2-user /home/ec2-user/.env
|
|
chmod 644 /home/ec2-user/.env
|
|
|
|
log "✓ Updated /home/ec2-user/.env with deployment configuration"
|
|
if [ -n "$AZURE_ENDPOINT" ]; then
|
|
log " ✓ Azure OpenAI configured (deployment: ${AZURE_DEPLOYMENT})"
|
|
else
|
|
log " ⚠ Azure OpenAI not configured - using fallback responses"
|
|
fi
|
|
|
|
# Verify .env file contents (mask sensitive values)
|
|
log "Verifying .env file contents:"
|
|
grep -E "^CADDY_DOMAIN=|^AZURE_OPENAI" /home/ec2-user/.env 2>/dev/null | sed 's/AZURE_OPENAI_API_KEY=.*/AZURE_OPENAI_API_KEY=***MASKED***/g' | sed 's/^/ /' || true
|
|
|
|
# Additional safety check: ensure CADDY_DOMAIN is not empty
|
|
if [ -z "$CADDY_DOMAIN" ]; then
|
|
log "ERROR: CADDY_DOMAIN is empty - docker-compose will not start properly"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify network connectivity BEFORE docker-compose build (critical for github access)
|
|
log "Performing pre-deployment network connectivity checks..."
|
|
if ! verify_network_connectivity; then
|
|
log "ERROR: Network connectivity checks failed"
|
|
log " Cannot proceed with docker-compose build (needs github.com access)"
|
|
exit 1
|
|
fi
|
|
log "OK: Network connectivity verified"
|
|
|
|
# Retrieve GitHub token from AWS Secrets Manager for private repo access during docker builds
|
|
log "Retrieving GitHub credentials from AWS Secrets Manager..."
|
|
GITHUB_TOKEN=""
|
|
GITHUB_SECRET=""
|
|
|
|
# Try to get GitHub token from Secrets Manager
|
|
GITHUB_SECRET=$(aws secretsmanager get-secret-value \
|
|
--secret-id "classroom/shared/github-token" \
|
|
--region "${AWS_REGION}" \
|
|
--query SecretString \
|
|
--output text 2>/dev/null || echo "")
|
|
|
|
if [ -n "$GITHUB_SECRET" ] && [ "$GITHUB_SECRET" != "None" ]; then
|
|
# Try to extract token (could be plain string or JSON)
|
|
if echo "$GITHUB_SECRET" | grep -q '{'; then
|
|
# JSON format
|
|
if command -v jq &> /dev/null; then
|
|
GITHUB_TOKEN=$(echo "$GITHUB_SECRET" | jq -r '.token // .github_token // .pat // empty' 2>/dev/null || echo "")
|
|
else
|
|
GITHUB_TOKEN=$(echo "$GITHUB_SECRET" | grep -o '"token":"[^"]*' | cut -d'"' -f4 || echo "")
|
|
fi
|
|
else
|
|
# Plain token string
|
|
GITHUB_TOKEN="$GITHUB_SECRET"
|
|
fi
|
|
fi
|
|
|
|
if [ -n "$GITHUB_TOKEN" ] && [ "$GITHUB_TOKEN" != "None" ]; then
|
|
log "OK: GitHub credentials found - configuring git"
|
|
# Configure git to use token for HTTPS cloning
|
|
# This enables docker builds that clone from private GitHub repositories
|
|
git config --global credential.helper store
|
|
echo "https://${GITHUB_TOKEN}@github.com" > /home/ec2-user/.git-credentials
|
|
chmod 600 /home/ec2-user/.git-credentials
|
|
export GIT_ASKPASS=/bin/true
|
|
export GITHUB_TOKEN
|
|
else
|
|
log "WARNING: GitHub token not found in Secrets Manager (public repos only)"
|
|
fi
|
|
|
|
# Deploy SUT containers using docker-compose with retry logic on network failures
|
|
# Note: Pass environment variables both via .env file AND explicit exports for maximum compatibility
|
|
log "Starting SUT containers (with automatic retry on network issues)..."
|
|
log " CADDY_DOMAIN: ${CADDY_DOMAIN}"
|
|
|
|
# ── Select the fellowship Caddyfile for the tutorial stack ───────────────────
|
|
# Tutorial instances serve three HTTPS sites via the single Caddy container:
|
|
# • CADDY_DOMAIN → SUT (reverse_proxy to backend:5000 / frontend:3000)
|
|
# • JENKINS_DOMAIN → Jenkins CI (reverse_proxy to host.docker.internal:8080)
|
|
# • IDE_DOMAIN → code-server (reverse_proxy to host.docker.internal:8443)
|
|
# Caddyfile.fellowship contains all three site blocks.
|
|
# Caddyfile (staging) and Caddyfile.prod only contain the SUT block and must
|
|
# NOT be used here — they would cause Caddy to fail if JENKINS_DOMAIN / IDE_DOMAIN
|
|
# are empty, and would not expose the DevOps Escape Room tools over HTTPS at all.
|
|
FELLOWSHIP_CADDYFILE="/home/ec2-user/caddy/Caddyfile.fellowship"
|
|
ACTIVE_CADDYFILE="/home/ec2-user/caddy/Caddyfile"
|
|
if [ -f "$FELLOWSHIP_CADDYFILE" ]; then
|
|
cp "$FELLOWSHIP_CADDYFILE" "$ACTIVE_CADDYFILE"
|
|
chown ec2-user:ec2-user "$ACTIVE_CADDYFILE"
|
|
log "✓ Copied Caddyfile.fellowship → caddy/Caddyfile (SUT + Jenkins + IDE HTTPS)"
|
|
else
|
|
log "WARNING: Caddyfile.fellowship not found at ${FELLOWSHIP_CADDYFILE}"
|
|
log " Jenkins and IDE will NOT be served via HTTPS."
|
|
log " Ensure caddy/Caddyfile.fellowship is present in the SUT tarball (see caddy/ directory)."
|
|
fi
|
|
|
|
# Function to deploy SUT with retry logic for network failures
|
|
deploy_sut_with_retry() {
|
|
local max_attempts=3
|
|
local attempt=1
|
|
local wait_time=10
|
|
|
|
while [ $attempt -le $max_attempts ]; do
|
|
log " Deployment attempt $attempt/$max_attempts..."
|
|
|
|
# Use cd to set working directory, then docker compose will auto-load .env
|
|
DEPLOY_OUTPUT=$(run_as_ec2user_docker "cd ~ && docker compose up -d 2>&1" 2>&1)
|
|
DEPLOY_EXIT_CODE=$?
|
|
|
|
if [ $DEPLOY_EXIT_CODE -eq 0 ]; then
|
|
log "OK: Docker Compose started successfully"
|
|
return 0
|
|
fi
|
|
|
|
# Check if error is network-related (github, DNS, connectivity, etc)
|
|
if echo "$DEPLOY_OUTPUT" | grep -iE "network|dns|resolve|github|credential|authentication|connection refused|timeout|no such device|temporary failure" >/dev/null 2>&1; then
|
|
log " WARNING: Network-related error detected, will retry..."
|
|
log " Error: $(echo \"$DEPLOY_OUTPUT\" | head -2 | tail -1)"
|
|
|
|
if [ $attempt -lt $max_attempts ]; then
|
|
log " Waiting ${wait_time}s before retry (attempt $((attempt + 1))/$max_attempts)..."
|
|
sleep $wait_time
|
|
wait_time=$((wait_time * 2)) # Exponential backoff: 10s, 20s, 40s
|
|
attempt=$((attempt + 1))
|
|
continue
|
|
fi
|
|
else
|
|
# Non-network error, fail immediately
|
|
log "ERROR: Failed to start SUT containers (non-recoverable error)"
|
|
log "Docker Compose output:"
|
|
echo "$DEPLOY_OUTPUT" | sed 's/^/ /'
|
|
return 1
|
|
fi
|
|
|
|
attempt=$((attempt + 1))
|
|
done
|
|
|
|
# All retries exhausted
|
|
log "ERROR: Failed to start SUT containers after $max_attempts attempts"
|
|
log "Docker Compose output:"
|
|
echo "$DEPLOY_OUTPUT" | sed 's/^/ /'
|
|
log "Checking Docker logs for more information..."
|
|
run_as_ec2user_docker "cd ~ && docker compose logs" 2>&1 | tail -50 | sed 's/^/ /'
|
|
return 1
|
|
}
|
|
|
|
# Execute deployment with retry
|
|
if ! deploy_sut_with_retry; then
|
|
exit 1
|
|
fi
|
|
|
|
log "Waiting for containers to be in running state..."
|
|
|
|
# Wait for containers to be running (up to 60 seconds)
|
|
CONTAINER_WAIT_COUNT=0
|
|
while [ $CONTAINER_WAIT_COUNT -lt 12 ]; do
|
|
RUNNING_CONTAINERS=$(run_as_ec2user_docker "cd ~ && docker compose ps -q --status running 2>/dev/null | wc -l" 2>/dev/null || echo "0")
|
|
EXPECTED_CONTAINERS=3
|
|
|
|
if [ "$RUNNING_CONTAINERS" -ge "$EXPECTED_CONTAINERS" ]; then
|
|
log "✓ All required containers running ($RUNNING_CONTAINERS/$EXPECTED_CONTAINERS)"
|
|
break
|
|
fi
|
|
|
|
log " Waiting for containers... ($RUNNING_CONTAINERS/$EXPECTED_CONTAINERS running, attempt $((CONTAINER_WAIT_COUNT + 1))/12)"
|
|
sleep 5
|
|
CONTAINER_WAIT_COUNT=$((CONTAINER_WAIT_COUNT + 1))
|
|
done
|
|
|
|
# Wait for backend health check to pass (up to 20 attempts * 3 seconds = 60 seconds)
|
|
log "Waiting for backend service to be healthy..."
|
|
BACKEND_HEALTH_COUNT=0
|
|
BACKEND_READY=false
|
|
while [ $BACKEND_HEALTH_COUNT -lt 20 ]; do
|
|
BACKEND_STATUS=$(run_as_ec2user_docker "cd ~ && docker compose ps backend --format json 2>/dev/null" | grep -o '"State":"running"' || echo "")
|
|
if [ -n "$BACKEND_STATUS" ]; then
|
|
log "✓ Backend container is running"
|
|
BACKEND_READY=true
|
|
break
|
|
fi
|
|
|
|
log " Waiting for backend to be ready... (attempt $((BACKEND_HEALTH_COUNT + 1))/20)"
|
|
sleep 3
|
|
BACKEND_HEALTH_COUNT=$((BACKEND_HEALTH_COUNT + 1))
|
|
done
|
|
|
|
# Wait for frontend to compile and start (React dev server, up to 60 seconds)
|
|
log "Waiting for frontend to compile and start..."
|
|
FRONTEND_WAIT_COUNT=0
|
|
FRONTEND_READY=false
|
|
while [ $FRONTEND_WAIT_COUNT -lt 20 ]; do
|
|
FRONTEND_LOGS=$(run_as_ec2user_docker "cd ~ && docker compose logs frontend 2>&1" | grep -iE "compiled successfully|webpack compiled|app is running on" || echo "")
|
|
if [ -n "$FRONTEND_LOGS" ]; then
|
|
log "✓ Frontend is ready"
|
|
FRONTEND_READY=true
|
|
break
|
|
fi
|
|
|
|
log " Waiting for frontend compilation... (attempt $((FRONTEND_WAIT_COUNT + 1))/20)"
|
|
sleep 3
|
|
FRONTEND_WAIT_COUNT=$((FRONTEND_WAIT_COUNT + 1))
|
|
done
|
|
|
|
if [ "$FRONTEND_READY" != "true" ]; then
|
|
log "ERROR: Frontend did not become ready within timeout"
|
|
dump_runtime_diagnostics
|
|
exit 1
|
|
fi
|
|
|
|
log "Running post-deploy health gates..."
|
|
HTTP_OK=false
|
|
for i in {1..20}; do
|
|
if curl -sSf --max-time 5 "http://localhost/" >/dev/null 2>&1; then
|
|
HTTP_OK=true
|
|
log "✓ Local HTTP health gate passed"
|
|
break
|
|
fi
|
|
log " Waiting for local HTTP health gate... (attempt $i/20)"
|
|
sleep 3
|
|
done
|
|
|
|
HTTPS_OK=false
|
|
for i in {1..30}; do
|
|
if curl -k -sSf --max-time 6 "https://localhost/" >/dev/null 2>&1; then
|
|
HTTPS_OK=true
|
|
log "✓ Local HTTPS health gate passed"
|
|
break
|
|
fi
|
|
log " Waiting for local HTTPS health gate... (attempt $i/30)"
|
|
sleep 3
|
|
done
|
|
|
|
if [ "$HTTP_OK" != "true" ] || [ "$HTTPS_OK" != "true" ]; then
|
|
log "ERROR: Post-deploy health gates failed (HTTP_OK=${HTTP_OK}, HTTPS_OK=${HTTPS_OK})"
|
|
dump_runtime_diagnostics
|
|
exit 1
|
|
fi
|
|
|
|
# Verify environment variables made it to Caddy container
|
|
log "Verifying environment variables in Caddy container..."
|
|
CADDY_ENV=$(run_as_ec2user_docker "cd ~ && docker inspect fellowship-caddy --format='{{.Config.Env}}' 2>/dev/null | grep -o 'CADDY_DOMAIN=[^[:space:]]*' || echo 'NOT FOUND'" 2>/dev/null)
|
|
if [ -n "$CADDY_ENV" ] && [ "$CADDY_ENV" != "NOT FOUND" ]; then
|
|
log "✓ CADDY_DOMAIN verified in container: $CADDY_ENV"
|
|
else
|
|
log "WARNING: CADDY_DOMAIN not found in container environment"
|
|
log " This may cause connection issues"
|
|
log " Container environment (first 20 vars):"
|
|
run_as_ec2user_docker "cd ~ && docker exec fellowship-caddy env 2>/dev/null | head -20" 2>/dev/null | sed 's/^/ /' || true
|
|
fi
|
|
|
|
# Final container status check
|
|
log "Final container status:"
|
|
run_as_ec2user_docker "cd ~ && docker compose ps" 2>&1 | sed 's/^/ /'
|
|
|
|
# Final status
|
|
PUBLIC_IP=$(get_instance_metadata "public-ipv4")
|
|
[ -z "$PUBLIC_IP" ] && PUBLIC_IP="N/A"
|
|
log "=========================================="
|
|
log "Setup Complete"
|
|
log "=========================================="
|
|
log "Public IP: $PUBLIC_IP"
|
|
log ""
|
|
log "─── Fellowship SUT ────────────────────────"
|
|
log " HTTPS: https://${CADDY_DOMAIN}/"
|
|
log ""
|
|
log "─── DevOps Escape Room ────────────────────"
|
|
log " Jenkins CI (HTTPS): https://${JENKINS_DOMAIN}/"
|
|
log " Jenkins CI (direct): http://${PUBLIC_IP}:8080 (fellowship / fellowship123)"
|
|
log " IDE / code-server (HTTPS): https://${IDE_DOMAIN}/"
|
|
log " IDE / code-server (direct): http://${PUBLIC_IP}:8443 (password: fellowship)"
|
|
log " Gitea Git: http://${PUBLIC_IP}:3030 (fellowship / fellowship123)"
|
|
log " MailHog UI: http://${PUBLIC_IP}:8025"
|
|
log "=========================================="
|