Add the VirtualBox sandbox design spec (headless 4th backend + share-an- appliance GUI mode with detect-first install), the crypto pay-to-join gate design, and the save/load PoC writeup with its demo/film driver scripts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
241 lines
11 KiB
Bash
Executable File
241 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# demo-save-load.sh — PoC harness for the "persistent sandbox" video beat.
|
|
#
|
|
# Flow (see docs/demo-save-load-poc.md):
|
|
# session A: /sbx launch docker → /ai start → /grant oracle →
|
|
# /ai oracle !build fib.py & run it → /sbx save buildbox → quit
|
|
# prove: container purged on quit, but hh-snap:buildbox image survives
|
|
# session B: fresh client → /sbx load buildbox → the model's code is intact
|
|
#
|
|
# Headless: drives the ratatui client over tmux send-keys, asserts via
|
|
# capture-pane + `docker exec`. PoC/correctness first; feeds video-toolkit later.
|
|
#
|
|
# Usage: hh/demo-save-load.sh [--keep]
|
|
# --keep leave the server, container, image and tmux sessions up afterwards
|
|
set -uo pipefail
|
|
|
|
# ---- config -----------------------------------------------------------------
|
|
REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
# Pick a free TCP port so we never collide with a stale server from another
|
|
# session (a leftover server on a fixed port answers SRP with its own password
|
|
# → spurious 401s). Honour an explicit $PORT if the caller forces one.
|
|
pick_port() { local p; for p in $(seq 4200 4280); do ss -ltn 2>/dev/null | grep -q ":$p " || { echo "$p"; return; }; done; echo 4173; }
|
|
PORT="${PORT:-$(pick_port)}"
|
|
PW="${PW:-malware-bless}"
|
|
LABEL="${LABEL:-buildbox}"
|
|
IMG="${IMG:-python:3.12-slim}" # base image: ships python3 so the built code runs
|
|
CTR="hack-house" # sbx::SBX_NAME — the container/instance name
|
|
SNAP="hh-snap:${LABEL}"
|
|
PY="$REPO/.venv/bin/python"
|
|
BIN="$REPO/hh/target/debug/hack-house"
|
|
SRV_SESS="hhpoc-srv"
|
|
A_SESS="hhpoc-a"
|
|
B_SESS="hhpoc-b"
|
|
EVID="$(mktemp -d /tmp/hh-poc.XXXXXX)"
|
|
KEEP=0; [[ "${1:-}" == "--keep" ]] && KEEP=1
|
|
|
|
GREEN=$'\e[32m'; RED=$'\e[31m'; YEL=$'\e[33m'; DIM=$'\e[2m'; RST=$'\e[0m'
|
|
step() { printf '\n%s== %s ==%s\n' "$YEL" "$*" "$RST"; }
|
|
ok() { printf '%s ok %s%s\n' "$GREEN" "$*" "$RST"; }
|
|
bad() { printf '%s XX %s%s\n' "$RED" "$*" "$RST"; }
|
|
note() { printf '%s %s%s\n' "$DIM" "$*" "$RST"; }
|
|
|
|
FAIL=0
|
|
fail() { bad "$*"; FAIL=1; }
|
|
|
|
cleanup() {
|
|
if [[ $KEEP -eq 1 ]]; then
|
|
note "--keep: leaving server/sessions/image up. Evidence: $EVID"
|
|
return
|
|
fi
|
|
step "cleanup"
|
|
tmux kill-session -t "$A_SESS" 2>/dev/null
|
|
tmux kill-session -t "$B_SESS" 2>/dev/null
|
|
tmux kill-session -t "$SRV_SESS" 2>/dev/null
|
|
docker rm -f "$CTR" >/dev/null 2>&1
|
|
docker rmi -f "$SNAP" >/dev/null 2>&1
|
|
note "removed container + $SNAP; sessions killed. Evidence kept: $EVID"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
# ---- helpers ----------------------------------------------------------------
|
|
# say <session> <text> : type a literal line then Enter (no Ctrl-U; renders race)
|
|
say() {
|
|
local sess="$1"; shift
|
|
tmux send-keys -t "$sess" -l "$*"
|
|
sleep 0.4
|
|
tmux send-keys -t "$sess" Enter
|
|
sleep 0.6
|
|
}
|
|
cap() { tmux capture-pane -t "$1" -p 2>/dev/null; } # snapshot a pane to stdout
|
|
snap_evid() { cap "$1" > "$EVID/$2.txt"; } # ...and save it
|
|
|
|
# wait_for <session> <regex> <timeout_s> : poll the pane until regex appears
|
|
wait_for() {
|
|
local sess="$1" re="$2" t="${3:-30}" i=0
|
|
while (( i < t*2 )); do
|
|
cap "$sess" | grep -qE "$re" && return 0
|
|
sleep 0.5; ((i++))
|
|
done
|
|
return 1
|
|
}
|
|
# wait_cmd <cmd...> : succeeds within a timeout (seconds via $WT, default 30)
|
|
wait_cmd() {
|
|
local t="${WT:-30}" i=0
|
|
while (( i < t )); do "$@" >/dev/null 2>&1 && return 0; sleep 1; ((i++)); done
|
|
return 1
|
|
}
|
|
|
|
# ---- 0. preflight -----------------------------------------------------------
|
|
step "preflight"
|
|
command -v tmux >/dev/null || { echo "tmux required"; exit 2; }
|
|
[[ -x "$PY" ]] || { echo "venv python missing: $PY"; exit 2; }
|
|
docker info >/dev/null 2>&1 || { echo "docker daemon down - start it first"; exit 2; }
|
|
ollama list 2>/dev/null | grep -q 'qwen2.5-coder' || note "warn: qwen2.5-coder not in 'ollama list' (coder path may fall back)"
|
|
ollama list 2>/dev/null | grep -q 'qwen2.5:3b' || note "warn: qwen2.5:3b not present (chat default)"
|
|
docker image inspect "$IMG" >/dev/null 2>&1 || { echo "pulling $IMG..."; docker pull "$IMG"; }
|
|
if [[ ! -x "$BIN" ]]; then
|
|
step "building client (debug)"; ( cd "$REPO/hh" && cargo build ) || exit 2
|
|
fi
|
|
ok "tools present, docker up, models checked"
|
|
note "evidence dir: $EVID"
|
|
|
|
# clear any stale state
|
|
tmux kill-session -t "$A_SESS" 2>/dev/null; tmux kill-session -t "$B_SESS" 2>/dev/null
|
|
tmux kill-session -t "$SRV_SESS" 2>/dev/null
|
|
docker rm -f "$CTR" >/dev/null 2>&1
|
|
docker rmi -f "$SNAP" >/dev/null 2>&1
|
|
|
|
# ---- 1. server --------------------------------------------------------------
|
|
step "boot server :$PORT"
|
|
tmux new-session -d -s "$SRV_SESS" -x 200 -y 50 \
|
|
"cd '$REPO' && '$PY' cmd_chat.py serve 127.0.0.1 $PORT --password '$PW' --no-tls 2>&1 | tee '$EVID/server.log'"
|
|
WT=20 wait_cmd bash -c "grep -qiE 'listening|running|serving|started|websocket' '$EVID/server.log'" \
|
|
|| sleep 3 # some builds log nothing; give it a beat
|
|
ok "server session up"
|
|
|
|
# ---- 2. session A: client ---------------------------------------------------
|
|
step "session A - alice joins"
|
|
tmux new-session -d -s "$A_SESS" -x 200 -y 50 \
|
|
"'$BIN' connect 127.0.0.1 $PORT alice --password '$PW' --no-tls 2>&1 | tee '$EVID/clientA.log'"
|
|
wait_for "$A_SESS" 'alice|roster|hack-house|owner' 20 && ok "alice in the room" \
|
|
|| fail "alice never joined (see $EVID/clientA.log)"
|
|
snap_evid "$A_SESS" 01-joined
|
|
|
|
# ---- 3. launch docker sandbox ----------------------------------------------
|
|
step "launch docker sandbox ($IMG)"
|
|
say "$A_SESS" "/sbx launch docker $IMG"
|
|
WT=60 wait_cmd docker ps --format '{{.Names}}' --filter "name=^${CTR}$" \
|
|
&& ok "container '$CTR' running" || fail "sandbox container never came up"
|
|
wait_for "$A_SESS" 'summoned|sandbox|ready|online' 60 >/dev/null
|
|
snap_evid "$A_SESS" 02-sandbox
|
|
|
|
# ---- 4. spawn the coder agent + grant drive --------------------------------
|
|
step "spawn oracle (qwen2.5:3b chat, qwen2.5-coder:1.5b for !task)"
|
|
say "$A_SESS" "/ai start"
|
|
wait_for "$A_SESS" 'oracle|online|ollama' 45 && ok "oracle announced" \
|
|
|| note "no 'online' line yet - agent log: ${TMPDIR:-/tmp}/hh-agent-oracle.log"
|
|
say "$A_SESS" "/grant oracle"
|
|
sleep 1
|
|
snap_evid "$A_SESS" 03-agent
|
|
|
|
# ---- 5. fast model builds code in the sandbox ------------------------------
|
|
step "fast qwen builds /root/fib.py in the sandbox"
|
|
say "$A_SESS" "/ai oracle !create /root/fib.py that prints the first 10 fibonacci numbers space-separated on one line, then run it with python3"
|
|
# Give the CPU coder model room to think, then poll for the file.
|
|
WT=150 wait_cmd docker exec "$CTR" test -s /root/fib.py
|
|
NEED='0 1 1 2 3 5 8 13 21 34'
|
|
runout() { docker exec "$CTR" sh -c 'cd /root && python3 fib.py' 2>&1; }
|
|
# Accept the model's work only if the file exists AND actually runs to the right
|
|
# sequence. A 1.5B model typed through a PTY sometimes drops indentation, so fall
|
|
# back to a known-good file (written BEFORE save, so the snapshot is meaningful).
|
|
if docker exec "$CTR" test -s /root/fib.py 2>/dev/null && runout | grep -qE "$NEED"; then
|
|
ok "model wrote a working /root/fib.py"
|
|
BUILT_BY="qwen2.5-coder"
|
|
else
|
|
note "model output missing or not runnable - writing deterministic fallback so the"
|
|
note "save/load proof still completes (retry for a clean model take in the video)."
|
|
docker exec "$CTR" sh -c 'cat > /root/fib.py <<"PY"
|
|
a, b = 0, 1
|
|
out = []
|
|
for _ in range(10):
|
|
out.append(str(a))
|
|
a, b = b, a + b
|
|
print(" ".join(out))
|
|
PY'
|
|
BUILT_BY="fallback"
|
|
fi
|
|
runout > "$EVID/fib-output.txt" 2>&1
|
|
ORIG_SHA="$(docker exec "$CTR" sha256sum /root/fib.py | awk '{print $1}')"
|
|
note "fib.py built by: $BUILT_BY"
|
|
note "fib.py output: $(cat "$EVID/fib-output.txt")"
|
|
docker exec "$CTR" cat /root/fib.py > "$EVID/fib-src-original.py"
|
|
snap_evid "$A_SESS" 04-built
|
|
grep -qE "$NEED" "$EVID/fib-output.txt" \
|
|
&& ok "fib.py prints the sequence" || fail "fib.py output unexpected"
|
|
|
|
# ---- 6. snapshot to an image -----------------------------------------------
|
|
step "/sbx save $LABEL (docker commit -> $SNAP)"
|
|
say "$A_SESS" "/sbx save $LABEL"
|
|
WT=40 wait_cmd bash -c "docker images $SNAP --format '{{.Tag}}' | grep -qx '$LABEL'" \
|
|
&& ok "image $SNAP created" || fail "snapshot image not found"
|
|
wait_for "$A_SESS" "saved|hh-snap|$LABEL" 10 >/dev/null
|
|
snap_evid "$A_SESS" 05-saved
|
|
|
|
# ---- 7. close the session (quit the client) --------------------------------
|
|
step "close session A (Ctrl-Q -> teardown purges the container)"
|
|
tmux send-keys -t "$A_SESS" C-q
|
|
sleep 3
|
|
tmux kill-session -t "$A_SESS" 2>/dev/null
|
|
WT=20 wait_cmd bash -c "! docker ps -a --format '{{.Names}}' | grep -qx '$CTR'" \
|
|
&& ok "container '$CTR' purged on quit" || fail "container still present after quit"
|
|
if docker images "$SNAP" --format '{{.Tag}}' | grep -qx "$LABEL"; then
|
|
ok "image $SNAP survived the purge"
|
|
else
|
|
fail "image $SNAP missing after purge"
|
|
fi
|
|
|
|
# ---- 8. session B: reopen and load -----------------------------------------
|
|
step "session B - fresh client, /sbx load $LABEL"
|
|
tmux new-session -d -s "$B_SESS" -x 200 -y 50 \
|
|
"'$BIN' connect 127.0.0.1 $PORT alice --password '$PW' --no-tls 2>&1 | tee '$EVID/clientB.log'"
|
|
wait_for "$B_SESS" 'alice|roster|hack-house|owner' 20 && ok "alice re-joined" \
|
|
|| fail "alice never re-joined"
|
|
say "$B_SESS" "/sbx load $LABEL"
|
|
WT=60 wait_cmd docker ps --format '{{.Names}}' --filter "name=^${CTR}$" \
|
|
&& ok "container relaunched from $SNAP" || fail "load never started a container"
|
|
wait_for "$B_SESS" 'summoned|sandbox|ready|loading|online' 60 >/dev/null
|
|
snap_evid "$B_SESS" 06-loaded
|
|
|
|
# ---- 9. the reveal: the model's code is intact -----------------------------
|
|
step "verify the work persisted"
|
|
WT=30 wait_cmd docker exec "$CTR" test -s /root/fib.py
|
|
NEW_SHA="$(docker exec "$CTR" sha256sum /root/fib.py 2>/dev/null | awk '{print $1}')"
|
|
docker exec "$CTR" cat /root/fib.py > "$EVID/fib-src-loaded.py" 2>/dev/null
|
|
docker exec "$CTR" sh -c 'cd /root && python3 fib.py' > "$EVID/fib-output-loaded.txt" 2>&1
|
|
note "original sha: $ORIG_SHA"
|
|
note "loaded sha: $NEW_SHA"
|
|
note "loaded output: $(cat "$EVID/fib-output-loaded.txt" 2>/dev/null)"
|
|
if [[ -n "$NEW_SHA" && "$NEW_SHA" == "$ORIG_SHA" ]]; then
|
|
ok "fib.py is byte-for-byte identical after close+reload - PERSISTENCE PROVEN"
|
|
else
|
|
fail "fib.py differs or missing after reload"
|
|
fi
|
|
# show it on the TUI for the camera
|
|
tmux send-keys -t "$B_SESS" F2; sleep 1 # drive
|
|
say "$B_SESS" "cat /root/fib.py && python3 /root/fib.py"
|
|
sleep 2
|
|
snap_evid "$B_SESS" 07-reveal
|
|
|
|
# ---- summary ----------------------------------------------------------------
|
|
step "result"
|
|
if [[ $FAIL -eq 0 ]]; then
|
|
printf '%sPoC PASS%s - built-by=%s, saved=%s, purged-on-quit, reloaded-intact\n' \
|
|
"$GREEN" "$RST" "$BUILT_BY" "$SNAP"
|
|
else
|
|
printf '%sPoC FAIL%s - inspect captures in %s\n' "$RED" "$RST" "$EVID"
|
|
fi
|
|
note "captures: $EVID/{01-joined,02-sandbox,03-agent,04-built,05-saved,06-loaded,07-reveal}.txt"
|
|
note "code: $EVID/fib-src-original.py vs fib-src-loaded.py"
|
|
exit $FAIL
|