diff --git a/deploy/dalidou/deploy.sh b/deploy/dalidou/deploy.sh index 77b493c..d6ad17a 100644 --- a/deploy/dalidou/deploy.sh +++ b/deploy/dalidou/deploy.sh @@ -160,6 +160,45 @@ else run "git clone --branch '$BRANCH' '$GIT_REMOTE' '$APP_DIR'" fi +# --------------------------------------------------------------------- +# Step 1.5: self-update re-exec guard +# --------------------------------------------------------------------- +# +# When deploy.sh itself changes in the commit we just pulled, the bash +# process running this script is still executing the OLD deploy.sh +# from memory — git reset --hard updated the file on disk but our +# in-memory instructions are stale. That's exactly how the first +# 2026-04-09 Dalidou deploy silently wrote "unknown" build_sha: old +# Step 2 logic ran against fresh source. Detect the mismatch and +# re-exec into the fresh copy so every post-update run exercises the +# new script. +# +# Guard rails: +# - Only runs when $APP_DIR exists, holds a git checkout, and a +# deploy.sh exists there (i.e. after Step 1 succeeded). +# - Uses a sentinel env var ATOCORE_DEPLOY_REEXECED=1 to make sure +# we only re-exec once, never recurse. +# - Skipped in dry-run mode (no mutation). +# - Skipped if $0 isn't a readable file (bash -c pipe inputs, etc.). + +if [ "$DRY_RUN" != "1" ] \ + && [ -z "${ATOCORE_DEPLOY_REEXECED:-}" ] \ + && [ -r "$0" ] \ + && [ -f "$APP_DIR/deploy/dalidou/deploy.sh" ]; then + ON_DISK_HASH="$(sha1sum "$APP_DIR/deploy/dalidou/deploy.sh" 2>/dev/null | awk '{print $1}')" + RUNNING_HASH="$(sha1sum "$0" 2>/dev/null | awk '{print $1}')" + if [ -n "$ON_DISK_HASH" ] \ + && [ -n "$RUNNING_HASH" ] \ + && [ "$ON_DISK_HASH" != "$RUNNING_HASH" ]; then + log "Step 1.5: deploy.sh changed in the pulled commit; re-exec'ing" + log " running script hash: $RUNNING_HASH" + log " on-disk script hash: $ON_DISK_HASH" + log " re-exec -> $APP_DIR/deploy/dalidou/deploy.sh" + export ATOCORE_DEPLOY_REEXECED=1 + exec bash "$APP_DIR/deploy/dalidou/deploy.sh" "$@" + fi +fi + # --------------------------------------------------------------------- # Step 2: capture build provenance to pass to the container # --------------------------------------------------------------------- diff --git a/docs/dalidou-deployment.md b/docs/dalidou-deployment.md index 644fed6..8315521 100644 --- a/docs/dalidou-deployment.md +++ b/docs/dalidou-deployment.md @@ -140,6 +140,36 @@ If you see `{"status": "unavailable", "fail_open": true}` from the client, the first thing to check is whether the base URL resolves from where you're running the client. +### The deploy.sh self-update race + +When `deploy.sh` itself changes in the commit being pulled, the +first run after the update is still executing the *old* script from +the bash process's in-memory copy. `git reset --hard` updates the +file on disk, but the running bash has already loaded the +instructions. On 2026-04-09 this silently shipped an "unknown" +`build_sha` because the old Step 2 (which predated env-var export) +ran against fresh source. + +`deploy.sh` now detects this: Step 1.5 compares the sha1 of `$0` +(the running script) against the sha1 of +`$APP_DIR/deploy/dalidou/deploy.sh` (the on-disk copy) after the +git reset. If they differ, it sets `ATOCORE_DEPLOY_REEXECED=1` and +`exec`s the fresh copy so the rest of the deploy runs under the new +script. The sentinel env var prevents infinite recursion. + +You'll see this in the logs as: + +```text +==> Step 1.5: deploy.sh changed in the pulled commit; re-exec'ing +==> running script hash: +==> on-disk script hash: +==> re-exec -> /srv/storage/atocore/app/deploy/dalidou/deploy.sh +``` + +To opt out (debugging, for example), pre-set +`ATOCORE_DEPLOY_REEXECED=1` before invoking `deploy.sh` and the +self-update guard will be skipped. + ### Deployment drift detection `/health` reports drift signals at three increasing levels of