From e877e5b8ff9de7f66f8fc1d64a4ae5929497fd6f Mon Sep 17 00:00:00 2001 From: Anto01 Date: Wed, 8 Apr 2026 18:08:49 -0400 Subject: [PATCH] deploy: version-visible /health + deploy.sh + update runbook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dalidou Claude's validation run against the live service exposed a structural gap: the deployment at /srv/storage/atocore/app has no git connection, the running container was built from pre-Phase-9 source, and /health hardcoded 'version: 0.1.0' so drift is invisible. Weeks of work have been shipping to Gitea but never reaching the live service. This commit fixes both the drift-invisibility problem and the absence of an update workflow, so the next deploy to Dalidou can go live cleanly and future drifts surface immediately. Layer 1: deployment drift is now visible via /health ---------------------------------------------------- - src/atocore/__init__.py: __version__ bumped from 0.1.0 to 0.2.0 and documented as the source of truth for the deployed code version, with a history block explaining when each bump happens (API surface change, schema change, user-visible behavior change) - src/atocore/main.py: FastAPI constructor now uses __version__ instead of the hardcoded '0.1.0' string, so the OpenAPI docs reflect the actual code version - src/atocore/api/routes.py: /health now reads from __version__ dynamically. Both the existing 'version' field and a new 'code_version' field report the same value for backwards compat. A new docstring explains that comparing this to the main branch's __version__ is the fastest way to detect drift. - pyproject.toml: version bumped to 0.2.0 to stay in sync The comparison is now: curl /health -> "code_version": "0.2.0" grep __version__ src/atocore/__init__.py -> "0.2.0" If those differ, the deployment is stale. Concrete, unambiguous. Layer 2: deploy.sh as the canonical update path ----------------------------------------------- New file: deploy/dalidou/deploy.sh One-shot bash script that handles both the first-time deploy (where /srv/storage/atocore/app may not be a git repo yet) and the ongoing update case. Steps: 1. If app dir is not a git checkout, back it up as .pre-git- and re-clone from Gitea. If it IS a checkout, fetch + reset --hard origin/. 2. Report the deployable commit SHA 3. Check that deploy/dalidou/.env exists (hard fail if missing with a clear message pointing at .env.example) 4. docker compose up -d --build — rebuilds the image from current source, restarts the container 5. Poll /health for up to 30 seconds; on failure, print the last 50 lines of container logs and exit non-zero 6. Parse /health.code_version and compare to the __version__ in the freshly-pulled source. If they differ, exit non-zero with a message suggesting docker compose down && up 7. On success, report commit + code_version + "health: ok" Configurable via env vars: - ATOCORE_APP_DIR (default /srv/storage/atocore/app) - ATOCORE_GIT_REMOTE (default http://dalidou:3000/Antoine/ATOCore.git) - ATOCORE_BRANCH (default main) - ATOCORE_HEALTH_URL (default http://127.0.0.1:8100/health) - ATOCORE_DEPLOY_DRY_RUN=1 for preview-only mode Explicit non-goals documented in the script header: - does not manage secrets (.env is the caller's responsibility) - does not take a pre-deploy backup (call /admin/backup first if you want one) - does not roll back on failure (redeploy a known-good commit to recover) - does not touch the DB directly — schema migrations run at service startup via the lifespan handler, and all existing _apply_migrations ALTERs are idempotent ADD COLUMN operations Layer 3: updated docs/dalidou-deployment.md ------------------------------------------- - First-time deployment steps now explicitly say "git clone", not "place the repository", so future first-time deploys don't end up as static snapshots again - New "Updating a running deployment" section covering deploy.sh usage with all three modes (normal / branch override / dry-run) - New "Deployment drift detection" section with the one-liner comparison between /health code_version and the repo's __version__ - New "Schema migrations on redeploy" section enumerating the exact ALTER TABLE statements that run on a pre-0.2.0 -> 0.2.0 upgrade, confirming they are additive-only and safe, and recommending a backup via /admin/backup before any redeploy Full suite: 215 passing, 1 warning. No test was hardcoded to the old version string, so the version bump was safe without test changes. What this commit does NOT do ---------------------------- - Does NOT execute the deploy on the live Dalidou instance. That requires Dalidou access and is the next step. A ready-to-paste prompt for Dalidou Claude will be provided separately. - Does NOT add CI/CD, webhook-based auto-deploy, or reverse proxy. Those remain in the 'deferred' section of the deployment doc. - Does NOT change the Dockerfile. The existing 'COPY source at build time' pattern is what deploy.sh relies on — rebuilding the image picks up new code. - Does NOT modify the database schema. The Phase 9 migrations that Dalidou's DB needs will be applied automatically on next service startup via the existing _apply_migrations path. --- deploy/dalidou/deploy.sh | 198 +++++++++++++++++++++++++++++++++++++ docs/dalidou-deployment.md | 101 +++++++++++++++++-- pyproject.toml | 2 +- src/atocore/__init__.py | 14 ++- src/atocore/api/routes.py | 15 ++- src/atocore/main.py | 3 +- 6 files changed, 320 insertions(+), 13 deletions(-) create mode 100644 deploy/dalidou/deploy.sh diff --git a/deploy/dalidou/deploy.sh b/deploy/dalidou/deploy.sh new file mode 100644 index 0000000..c2c7551 --- /dev/null +++ b/deploy/dalidou/deploy.sh @@ -0,0 +1,198 @@ +#!/usr/bin/env bash +# +# deploy/dalidou/deploy.sh +# ------------------------- +# One-shot deploy script for updating the running AtoCore container +# on Dalidou from the current Gitea main branch. +# +# The script is idempotent and safe to re-run. It handles both the +# first-time deploy (where /srv/storage/atocore/app may not yet be +# a git checkout) and the ongoing update case (where it is). +# +# Usage +# ----- +# +# # Normal update from main (most common) +# bash deploy/dalidou/deploy.sh +# +# # Deploy a specific branch or tag +# ATOCORE_BRANCH=codex/some-feature bash deploy/dalidou/deploy.sh +# +# # Dry-run: show what would happen without touching anything +# ATOCORE_DEPLOY_DRY_RUN=1 bash deploy/dalidou/deploy.sh +# +# Environment variables +# --------------------- +# +# ATOCORE_APP_DIR default /srv/storage/atocore/app +# ATOCORE_GIT_REMOTE default http://dalidou:3000/Antoine/ATOCore.git +# ATOCORE_BRANCH default main +# ATOCORE_DEPLOY_DRY_RUN if set to 1, report only, no mutations +# ATOCORE_HEALTH_URL default http://127.0.0.1:8100/health +# +# Safety rails +# ------------ +# +# - If the app dir exists but is NOT a git repo, the script renames +# it to .pre-git- before re-cloning, so you never +# lose the pre-existing snapshot to a git clobber. +# - If the health check fails after restart, the script exits +# non-zero and prints the container logs tail for diagnosis. +# - Dry-run mode is the default recommendation for the first deploy +# on a new environment: it shows the planned git operations and +# the compose command without actually running them. +# +# What this script does NOT do +# ---------------------------- +# +# - Does not manage secrets / .env files. The caller is responsible +# for placing deploy/dalidou/.env before running. +# - Does not run a backup before deploying. Run the backup endpoint +# first if you want a pre-deploy snapshot. +# - Does not roll back on health-check failure. If deploy fails, +# the previous container is already stopped; you need to redeploy +# a known-good commit to recover. +# - Does not touch the database. The Phase 9 schema migrations in +# src/atocore/models/database.py::_apply_migrations are idempotent +# ALTER TABLE ADD COLUMN calls that run at service startup via the +# lifespan handler. Stale pre-Phase-9 schema is upgraded in place. + +set -euo pipefail + +APP_DIR="${ATOCORE_APP_DIR:-/srv/storage/atocore/app}" +GIT_REMOTE="${ATOCORE_GIT_REMOTE:-http://dalidou:3000/Antoine/ATOCore.git}" +BRANCH="${ATOCORE_BRANCH:-main}" +HEALTH_URL="${ATOCORE_HEALTH_URL:-http://127.0.0.1:8100/health}" +DRY_RUN="${ATOCORE_DEPLOY_DRY_RUN:-0}" +COMPOSE_DIR="$APP_DIR/deploy/dalidou" + +log() { printf '==> %s\n' "$*"; } +run() { + if [ "$DRY_RUN" = "1" ]; then + printf ' [dry-run] %s\n' "$*" + else + eval "$@" + fi +} + +log "AtoCore deploy starting" +log " app dir: $APP_DIR" +log " git remote: $GIT_REMOTE" +log " branch: $BRANCH" +log " health url: $HEALTH_URL" +log " dry run: $DRY_RUN" + +# --------------------------------------------------------------------- +# Step 1: make sure $APP_DIR is a proper git checkout of the branch +# --------------------------------------------------------------------- + +if [ -d "$APP_DIR/.git" ]; then + log "Step 1: app dir is already a git checkout; fetching latest" + run "cd '$APP_DIR' && git fetch origin '$BRANCH'" + run "cd '$APP_DIR' && git reset --hard 'origin/$BRANCH'" +else + log "Step 1: app dir is NOT a git checkout; converting" + if [ -d "$APP_DIR" ]; then + BACKUP="${APP_DIR}.pre-git-$(date -u +%Y%m%dT%H%M%SZ)" + log " backing up existing snapshot to $BACKUP" + run "mv '$APP_DIR' '$BACKUP'" + fi + log " cloning $GIT_REMOTE -> $APP_DIR (branch: $BRANCH)" + run "git clone --branch '$BRANCH' '$GIT_REMOTE' '$APP_DIR'" +fi + +# --------------------------------------------------------------------- +# Step 2: show what we're deploying +# --------------------------------------------------------------------- + +log "Step 2: deployable commit" +if [ "$DRY_RUN" != "1" ] && [ -d "$APP_DIR/.git" ]; then + ( cd "$APP_DIR" && git log --oneline -1 ) + ( cd "$APP_DIR" && git rev-parse HEAD > /tmp/atocore-deploying-sha.txt ) + DEPLOYING_SHA="$(cat /tmp/atocore-deploying-sha.txt | cut -c1-7)" + log " commit: $DEPLOYING_SHA" +else + log " [dry-run] would read git log from $APP_DIR" + DEPLOYING_SHA="dry-run" +fi + +# --------------------------------------------------------------------- +# Step 3: preserve the .env file (it's not in git) +# --------------------------------------------------------------------- + +ENV_FILE="$COMPOSE_DIR/.env" +if [ "$DRY_RUN" != "1" ] && [ ! -f "$ENV_FILE" ]; then + log "Step 3: WARNING — $ENV_FILE does not exist" + log " the compose workflow needs this file to map mount points" + log " copy deploy/dalidou/.env.example to $ENV_FILE and edit it" + log " before re-running this script" + exit 2 +fi + +# --------------------------------------------------------------------- +# Step 4: rebuild and restart the container +# --------------------------------------------------------------------- + +log "Step 4: rebuilding and restarting the atocore container" +run "cd '$COMPOSE_DIR' && docker compose up -d --build" + +if [ "$DRY_RUN" = "1" ]; then + log "dry-run complete — no mutations performed" + exit 0 +fi + +# --------------------------------------------------------------------- +# Step 5: wait for the service to come up and pass the health check +# --------------------------------------------------------------------- + +log "Step 5: waiting for /health to respond" +for i in 1 2 3 4 5 6 7 8 9 10; do + if curl -fsS "$HEALTH_URL" > /tmp/atocore-health.json 2>/dev/null; then + log " service is responding" + break + fi + log " not ready yet ($i/10); waiting 3s" + sleep 3 +done + +if ! curl -fsS "$HEALTH_URL" > /tmp/atocore-health.json 2>/dev/null; then + log "FATAL: service did not come up within 30 seconds" + log " container logs (last 50 lines):" + cd "$COMPOSE_DIR" && docker compose logs --tail=50 atocore || true + exit 3 +fi + +# --------------------------------------------------------------------- +# Step 6: verify the deployed version matches expectations +# --------------------------------------------------------------------- + +log "Step 6: verifying deployed version" +log " /health response:" +if command -v jq >/dev/null 2>&1; then + jq . < /tmp/atocore-health.json | sed 's/^/ /' + REPORTED_VERSION="$(jq -r '.code_version // .version' < /tmp/atocore-health.json)" +else + cat /tmp/atocore-health.json | sed 's/^/ /' + echo + REPORTED_VERSION="$(grep -o '"code_version":"[^"]*"' /tmp/atocore-health.json | head -1 | cut -d'"' -f4)" + if [ -z "$REPORTED_VERSION" ]; then + REPORTED_VERSION="$(grep -o '"version":"[^"]*"' /tmp/atocore-health.json | head -1 | cut -d'"' -f4)" + fi +fi + +EXPECTED_VERSION="$(grep -oE "__version__ = \"[^\"]+\"" "$APP_DIR/src/atocore/__init__.py" | head -1 | cut -d'"' -f2)" + +log " expected code_version: $EXPECTED_VERSION (from $APP_DIR/src/atocore/__init__.py)" +log " reported code_version: $REPORTED_VERSION (from live /health)" + +if [ "$REPORTED_VERSION" != "$EXPECTED_VERSION" ]; then + log "WARNING: deployed version mismatch" + log " the container may not have picked up the new image" + log " try: docker compose down && docker compose up -d --build" + exit 4 +fi + +log "Deploy complete." +log " commit: $DEPLOYING_SHA" +log " code_version: $REPORTED_VERSION" +log " health: ok" diff --git a/docs/dalidou-deployment.md b/docs/dalidou-deployment.md index 618b5f6..8ba3577 100644 --- a/docs/dalidou-deployment.md +++ b/docs/dalidou-deployment.md @@ -50,26 +50,111 @@ starting from: deploy/dalidou/.env.example ``` -## Deployment steps +## First-time deployment steps + +1. Place the repository under `/srv/storage/atocore/app` — ideally as a + proper git clone so future updates can be pulled, not as a static + snapshot: + + ```bash + sudo git clone http://dalidou:3000/Antoine/ATOCore.git \ + /srv/storage/atocore/app + ``` -1. Place the repository under `/srv/storage/atocore/app`. 2. Create the canonical directories listed above. 3. Copy `deploy/dalidou/.env.example` to `deploy/dalidou/.env`. 4. Adjust the source paths if your AtoVault/AtoDrive mirrors live elsewhere. 5. Run: -```bash -cd /srv/storage/atocore/app/deploy/dalidou -docker compose up -d --build -``` + ```bash + cd /srv/storage/atocore/app/deploy/dalidou + docker compose up -d --build + ``` 6. Validate: + ```bash + curl http://127.0.0.1:8100/health + curl http://127.0.0.1:8100/sources + ``` + +## Updating a running deployment + +**Use `deploy/dalidou/deploy.sh` for every code update.** It is the +one-shot sync script that: + +- fetches latest main from Gitea into `/srv/storage/atocore/app` +- (if the app dir is not a git checkout) backs it up as + `.pre-git-` and re-clones +- rebuilds the container image +- restarts the container +- waits for `/health` to respond +- compares the reported `code_version` against the + `__version__` in the freshly-pulled source, and exits non-zero + if they don't match (deployment drift detection) + ```bash -curl http://127.0.0.1:8100/health -curl http://127.0.0.1:8100/sources +# Normal update from main +bash /srv/storage/atocore/app/deploy/dalidou/deploy.sh + +# Deploy a specific branch or tag +ATOCORE_BRANCH=codex/some-feature \ + bash /srv/storage/atocore/app/deploy/dalidou/deploy.sh + +# Dry-run: show what would happen without touching anything +ATOCORE_DEPLOY_DRY_RUN=1 \ + bash /srv/storage/atocore/app/deploy/dalidou/deploy.sh ``` +The script is idempotent and safe to re-run. It never touches the +database directly — schema migrations are applied automatically at +service startup by the lifespan handler in `src/atocore/main.py` +which calls `init_db()` (which in turn runs the ALTER TABLE +statements in `_apply_migrations`). + +### Deployment drift detection + +`/health` reports both `version` and `code_version` fields, both set +from `atocore.__version__` at import time. To check whether the +deployed code matches the repo's `main` branch: + +```bash +# What's running +curl -s http://127.0.0.1:8100/health | grep -o '"code_version":"[^"]*"' + +# What's in the repo's main branch +grep '__version__' /srv/storage/atocore/app/src/atocore/__init__.py +``` + +If these differ, the deployment is stale. Run `deploy.sh` to sync. + +### Schema migrations on redeploy + +When updating from an older `__version__`, the first startup after +the redeploy runs the idempotent ALTER TABLE migrations in +`_apply_migrations`. For a pre-0.2.0 → 0.2.0 upgrade the migrations +add these columns to existing tables (all with safe defaults so no +data is touched): + +- `memories.project TEXT DEFAULT ''` +- `memories.last_referenced_at DATETIME` +- `memories.reference_count INTEGER DEFAULT 0` +- `interactions.response TEXT DEFAULT ''` +- `interactions.memories_used TEXT DEFAULT '[]'` +- `interactions.chunks_used TEXT DEFAULT '[]'` +- `interactions.client TEXT DEFAULT ''` +- `interactions.session_id TEXT DEFAULT ''` +- `interactions.project TEXT DEFAULT ''` + +Plus new indexes on the new columns. No row data is modified. The +migration is safe to run against a database that already has the +columns — the `_column_exists` check makes each ALTER a no-op in +that case. + +Backup the database before any redeploy (via `POST /admin/backup`) +if you want a pre-upgrade snapshot. The migration is additive and +reversible by restoring the snapshot. + ## Deferred - backup automation diff --git a/pyproject.toml b/pyproject.toml index 72da305..f4b4d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "atocore" -version = "0.1.0" +version = "0.2.0" description = "Personal context engine for LLM interactions" requires-python = ">=3.11" dependencies = [ diff --git a/src/atocore/__init__.py b/src/atocore/__init__.py index cee2512..69910bb 100644 --- a/src/atocore/__init__.py +++ b/src/atocore/__init__.py @@ -1,3 +1,15 @@ """AtoCore — Personal Context Engine.""" -__version__ = "0.1.0" +# Bumped when a commit meaningfully changes the API surface, schema, or +# user-visible behavior. The /health endpoint reports this value so +# deployment drift is immediately visible: if the running service's +# /health reports an older version than the main branch's __version__, +# the deployment is stale and needs a redeploy (see +# docs/dalidou-deployment.md and deploy/dalidou/deploy.sh). +# +# History: +# 0.1.0 Phase 0/0.5/1/2/3/5/7 baseline +# 0.2.0 Phase 9 reflection loop (capture/reinforce/extract + review +# queue), shared client v0.2.0, project identity +# canonicalization at every service-layer entry point +__version__ = "0.2.0" diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index 85b4ad3..c5b1270 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -742,12 +742,23 @@ def api_validate_backup(stamp: str) -> dict: @router.get("/health") def api_health() -> dict: - """Health check.""" + """Health check. + + The ``version`` and ``code_version`` fields both report the value + of ``atocore.__version__`` from the deployed code. Comparing this + to the main branch's ``__version__`` is the fastest way to detect + deployment drift: if they differ, the running service is behind + the repo and needs a redeploy (see + ``docs/dalidou-deployment.md`` and ``deploy/dalidou/deploy.sh``). + """ + from atocore import __version__ + store = get_vector_store() source_status = get_source_status() return { "status": "ok", - "version": "0.1.0", + "version": __version__, + "code_version": __version__, "vectors_count": store.count, "env": _config.settings.env, "machine_paths": { diff --git a/src/atocore/main.py b/src/atocore/main.py index eba3434..e123f8e 100644 --- a/src/atocore/main.py +++ b/src/atocore/main.py @@ -4,6 +4,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI +from atocore import __version__ from atocore.api.routes import router import atocore.config as _config from atocore.context.project_state import init_project_state_schema @@ -43,7 +44,7 @@ async def lifespan(app: FastAPI): app = FastAPI( title="AtoCore", description="Personal Context Engine for LLM interactions", - version="0.1.0", + version=__version__, lifespan=lifespan, )