diff --git a/README.MD b/README.MD index 7821675..c6bf71c 100644 --- a/README.MD +++ b/README.MD @@ -42,6 +42,8 @@ Encrypted chat that runs in your terminal. You host the server, you control the | `hh/` | The Rust `ratatui` client (the flagship) | | `cmd_chat/`, `cmd_chat.py` | The Python (Sanic) server + legacy Python client | | `cmd_chat/agent/` | The model-agnostic AI agent bridge (joins a room as an encrypted client) | +| `models.toml` | Named provider profiles for `/ai start ` (see `docs/providers.md`) | +| `docs/providers.md` | Connect any model — profiles, flags, discovery, bring-your-own-provider | | `hh/lets-hack.sh` | Spin up a local test "clergy" in tmux (server + N client panes) | | `bootstrap-ai.sh` | Optional: install Ollama + pull a model for the local `/ai` agent | | `hh/direnv-autostart/` | `cd` into a directory to auto-launch a session (direnv) | @@ -144,9 +146,11 @@ Type to chat. Slash commands and keys: | `/theme [name]` | Switch vestments, or list them | | `/send ` | Offer a file (or directory) to the room | | `/accept` · `/reject` | Respond to a pending file offer | -| `/ai start [model]` | Summon a local AI agent (default `ollama/qwen2.5:3b`) | +| `/ai start [model\|profile]` | Summon a local AI agent (default `ollama/qwen2.5:3b`; a bare name is a `models.toml` profile) | | `/ai stop` | Dismiss the agent you summoned | | `/ai ` | Ask the agent (`/ai ` if several present) | +| `/ai list` | List the agents present (or hint to `/ai start` if none) | +| `/ai models` | Models the active agent can serve — or, with no agent, your local Ollama tags | | `/sbx launch [local\|docker\|multipass] [image]` | Summon the shared sandbox | | `/sbx stop` | Tear down the sandbox you host | | `/drive` · `F2` | Take the shared shell (`Esc` releases) | @@ -224,6 +228,16 @@ when you quit). Pick a model at summon time with `/ai start `. endpoint (OpenAI, Groq, Together, local vLLM…), plus a `module:Class` hook for your own. Cloud providers are opt-in and read their API key from the agent's environment — never the room. +- **Named profiles.** Register a backend once in `models.toml` and summon it by + name: `/ai start groq-llama`. Profiles store `api_key_env` (the *name* of an + env var, never the key), so the file is safe to commit. See the full + [provider guide](docs/providers.md) — profiles, explicit flags, discovery, and + bring-your-own-provider. +- **Discoverable.** `/ai list` shows who's present and `/ai models` shows what + the active agent can serve (active model bracketed). With no agent running, + `/ai models` still probes your local Ollama so you can see what's pullable + before summoning. By hand, `--list-models` enumerates a backend and `--check` + preflights it (exit 0/1) without joining a room. - **End-to-end like everything else.** Replies are encrypted client-side; the server still only ever relays ciphertext. diff --git a/cmd_chat/agent/__main__.py b/cmd_chat/agent/__main__.py index 2cda2dc..1b9ca2e 100644 --- a/cmd_chat/agent/__main__.py +++ b/cmd_chat/agent/__main__.py @@ -14,40 +14,100 @@ Examples python -m cmd_chat.agent 127.0.0.1 3000 --provider openai \ --base-url https://api.groq.com/openai/v1 --model llama-3.1-70b --password hunter2 + # a named profile from models.toml (provider + model + endpoint + key env) + python -m cmd_chat.agent 127.0.0.1 3000 --profile groq-llama --password hunter2 + # a custom provider you wrote python -m cmd_chat.agent 127.0.0.1 3000 --provider mypkg.mod:MyProvider + + # discovery / preflight (no room join) + python -m cmd_chat.agent --profile groq-llama --list-models + python -m cmd_chat.agent --profile groq-llama --check """ from __future__ import annotations import argparse +import sys from .bridge import AgentBridge -from .providers import make_provider +from .profiles import load_profiles, provider_from_profile +from .providers import make_provider, preflight + + +def _build_provider(args, ap): + """Resolve a Provider from either --profile or the explicit flags.""" + if args.profile: + profiles = load_profiles(args.models_file) + if args.profile not in profiles: + known = ", ".join(profiles) or "(none — create models.toml)" + ap.error(f"unknown profile '{args.profile}'. known: {known}") + prof = profiles[args.profile] + provider = provider_from_profile( + prof, name=args.profile, model=args.model, base_url=args.base_url + ) + # Profile may also supply non-provider defaults. + if args.system is None and prof.get("system"): + args.system = prof["system"] + if args.context_window == 12 and prof.get("context_window"): + args.context_window = int(prof["context_window"]) + return provider + + opts: dict = {} + if args.base_url and (args.provider == "openai" or ":" in args.provider): + opts["base_url"] = args.base_url + return make_provider(args.provider, model=args.model, **opts) def main() -> None: ap = argparse.ArgumentParser( prog="cmd_chat.agent", description="hack-house AI agent bridge (PoC)" ) - ap.add_argument("server") - ap.add_argument("port", type=int) + ap.add_argument("server", nargs="?", help="room host (omit with --list-models/--check)") + ap.add_argument("port", type=int, nargs="?", help="room port") ap.add_argument("--name", default="oracle", help="agent's room display name") ap.add_argument("--password", default=None, help="room password") ap.add_argument("--provider", default="ollama", help="ollama | anthropic | openai | module:Class") + ap.add_argument("--profile", default=None, + help="named profile from models.toml (overrides --provider/--model)") + ap.add_argument("--models-file", default=None, + help="path to models.toml (default: $HH_MODELS_FILE, ./models.toml, ~/.config/hh/models.toml)") ap.add_argument("--model", default=None, help="model name (provider default if omitted)") ap.add_argument("--base-url", default=None, help="endpoint for openai-compatible providers") ap.add_argument("--system", default=None, help="override the system prompt") ap.add_argument("--context-window", type=int, default=12) + ap.add_argument("--list-models", action="store_true", + help="list models the backend can serve, then exit") + ap.add_argument("--check", action="store_true", + help="run a reachability/model preflight, then exit (0 ok, 1 fail)") ap.add_argument("--insecure", action="store_true", help="skip TLS cert verification") ap.add_argument("--no-tls", action="store_true", help="plain ws/http (local/Tailscale)") args = ap.parse_args() - opts: dict = {} - if args.base_url and (args.provider == "openai" or ":" in args.provider): - opts["base_url"] = args.base_url - provider = make_provider(args.provider, model=args.model, **opts) + provider = _build_provider(args, ap) + + # Discovery / preflight modes never join a room. + if args.list_models: + discover = getattr(provider, "available_models", None) + if discover is None: + ap.error(f"provider '{provider.name}' has no model discovery") + for m in discover(): + print(m) + return + if args.check: + ok, msg = preflight(provider) + print(("ok: " if ok else "FAIL: ") + msg, file=sys.stderr if not ok else sys.stdout) + sys.exit(0 if ok else 1) + + if args.server is None or args.port is None: + ap.error("server and port are required to join a room") + + # Non-fatal preflight: warn early, but still try (discovery may be blocked + # while completion works). + ok, msg = preflight(provider) + if not ok: + print(f"⚠ preflight: {msg}", file=sys.stderr) bridge = AgentBridge( args.server, args.port, name=args.name, provider=provider, diff --git a/cmd_chat/agent/bridge.py b/cmd_chat/agent/bridge.py index eab2c2d..b99d67d 100644 --- a/cmd_chat/agent/bridge.py +++ b/cmd_chat/agent/bridge.py @@ -62,7 +62,44 @@ class AgentBridge(Client): frame = json.dumps({"_ai": "typing", "name": self.name, "on": on}) await ws.send(self.room_fernet.encrypt(frame.encode()).decode()) + async def _send_chat(self, ws, text: str) -> None: + await ws.send(self.room_fernet.encrypt(text.encode()).decode()) + + def _command_reply(self, question: str) -> str | None: + """Canned reply for a reserved verb, else None. + + Handled locally so it never spends a model call: + - ``list`` → this agent's roster line (who's here / what it runs). With + several agents present each answers for itself, forming the roster. + - ``models`` → what the configured backend can serve (in-room + --list-models).""" + verb = question.strip().lower() + if verb == "list": + return (f"{self.name} (ai) here — {self.provider.name}/" + f"{self.provider.model}, context {self.context_window}") + if verb != "models": + return None + discover = getattr(self.provider, "available_models", None) + if discover is None: + return f"{self.provider.name}: model discovery not supported." + try: + models = discover() + except Exception as e: # noqa: BLE001 — report unreachable backend in-room + return f"[ai error: cannot reach {self.provider.name}: {e}]" + if not models: + return f"{self.provider.name}: no models reported." + # One line: the TUI collapses embedded newlines, so bracket the active + # model instead of using a multi-line, marker-prefixed list. + mark = lambda m: f"[{m}]" if m == self.provider.model else m # noqa: E731 + listing = ", ".join(mark(m) for m in models) + return f"{self.provider.name} models ([active]): {listing}" + async def _answer(self, ws, question: str, asker: str) -> None: + canned = self._command_reply(question) + if canned is not None: + await self._send_chat(ws, canned) + self.success(f"answered /ai {question.strip().lower()} for {asker}") + return self.transcript.append(Msg("user", f"{asker}: {question}")) await self._send_typing(ws, True) try: @@ -77,7 +114,7 @@ class AgentBridge(Client): await self._send_typing(ws, False) reply = reply.strip() or "[empty reply]" self.transcript.append(Msg("assistant", reply)) - await ws.send(self.room_fernet.encrypt(reply.encode()).decode()) + await self._send_chat(ws, reply) self.success(f"replied to {asker}") async def run_async(self) -> None: diff --git a/cmd_chat/agent/profiles.py b/cmd_chat/agent/profiles.py new file mode 100644 index 0000000..00dd466 --- /dev/null +++ b/cmd_chat/agent/profiles.py @@ -0,0 +1,102 @@ +"""Named model profiles for the hack-house AI agent. + +A *profile* maps a friendly name (``groq-llama``, ``local``, ``claude``) to a +provider + model + endpoint, so operators type ``--profile groq-llama`` instead +of remembering ``--provider openai --base-url … --model …``. This mirrors the +``models:`` list in Continue.dev and the ``model_list`` in a LiteLLM proxy: +each entry is ``{provider, model, base_url, api_key_env}``. + +Secrets are **never** stored here — ``api_key_env`` names an environment +variable to read the key from, keeping the file safe to commit and share. + +Lookup order (first hit wins): + 1. ``$HH_MODELS_FILE`` + 2. ``./models.toml`` (cwd) + 3. ``~/.config/hh/models.toml`` +""" + +from __future__ import annotations + +import os +from pathlib import Path + +try: # stdlib on 3.11+, falls back to the `tomli` backport on 3.10 + import tomllib +except ModuleNotFoundError: # pragma: no cover + import tomli as tomllib # type: ignore[no-redef] + +from .providers import Provider, make_provider + +_RECOGNIZED = {"provider", "model", "base_url", "host", "api_key_env", + "system", "context_window"} + + +def _candidate_paths(explicit: str | None) -> list[Path]: + if explicit: + return [Path(explicit).expanduser()] + paths = [] + env = os.environ.get("HH_MODELS_FILE") + if env: + paths.append(Path(env).expanduser()) + paths.append(Path.cwd() / "models.toml") + paths.append(Path.home() / ".config" / "hh" / "models.toml") + return paths + + +def find_profiles_file(explicit: str | None = None) -> Path | None: + for p in _candidate_paths(explicit): + if p.is_file(): + return p + return None + + +def load_profiles(explicit: str | None = None) -> dict[str, dict]: + """Return ``{name: profile_dict}`` from the first models.toml found.""" + path = find_profiles_file(explicit) + if path is None: + return {} + with path.open("rb") as fh: + data = tomllib.load(fh) + profiles: dict[str, dict] = {} + for name, body in data.items(): + if not isinstance(body, dict) or "provider" not in body: + continue # skip non-profile tables / malformed entries + unknown = set(body) - _RECOGNIZED + if unknown: + raise ValueError( + f"profile '{name}': unknown key(s) {', '.join(sorted(unknown))}" + ) + profiles[name] = body + return profiles + + +def provider_from_profile(prof: dict, *, name: str = "?", + model: str | None = None, + base_url: str | None = None) -> Provider: + """Build a :class:`Provider` from a profile dict. + + ``model`` / ``base_url`` (CLI flags) override the profile when given. The + api key is read from ``$`` and passed only to providers that + accept one, so an Ollama profile never sees a stray ``api_key`` kwarg. + """ + spec = prof["provider"] + custom = ":" in spec + opts: dict = {} + + mdl = model or prof.get("model") + bu = base_url or prof.get("base_url") + if bu and (spec == "openai" or custom): + opts["base_url"] = bu + if spec == "ollama" and prof.get("host"): + opts["host"] = prof["host"] + + key_env = prof.get("api_key_env") + if key_env and (spec in ("openai", "anthropic") or custom): + key = os.environ.get(key_env) + if not key: + raise SystemExit( + f"profile '{name}': ${key_env} is not set — export it first" + ) + opts["api_key"] = key + + return make_provider(spec, model=mdl, **opts) diff --git a/cmd_chat/agent/providers.py b/cmd_chat/agent/providers.py index 8340002..e4a3b98 100644 --- a/cmd_chat/agent/providers.py +++ b/cmd_chat/agent/providers.py @@ -30,6 +30,11 @@ class Provider(Protocol): def complete(self, system: str, messages: list[Msg]) -> str: ... + # Optional: list models the backend can serve, for discovery/preflight. + # Providers that can't enumerate (e.g. a bespoke endpoint) may omit this. + def available_models(self) -> list[str]: + ... + class OllamaProvider: """Local Ollama (default, recommended). No API key — privacy-preserving.""" @@ -52,6 +57,11 @@ class OllamaProvider: r.raise_for_status() return (r.json().get("message", {}).get("content") or "").strip() + def available_models(self) -> list[str]: + r = requests.get(f"{self.host}/api/tags", timeout=self.timeout) + r.raise_for_status() + return [m.get("name", "") for m in r.json().get("models", [])] + class AnthropicProvider: """Anthropic Messages API. Cloud — opt-in. Needs ANTHROPIC_API_KEY.""" @@ -92,6 +102,15 @@ class AnthropicProvider: blocks = r.json().get("content", []) return "".join(b.get("text", "") for b in blocks).strip() + def available_models(self) -> list[str]: + r = requests.get( + "https://api.anthropic.com/v1/models", + timeout=self.timeout, + headers={"x-api-key": self.api_key, "anthropic-version": "2023-06-01"}, + ) + r.raise_for_status() + return [m.get("id", "") for m in r.json().get("data", [])] + class OpenAICompatibleProvider: """OpenAI-style /chat/completions — OpenAI, Groq, Together, local vLLM, etc.""" @@ -120,6 +139,14 @@ class OpenAICompatibleProvider: r.raise_for_status() return r.json()["choices"][0]["message"]["content"].strip() + def available_models(self) -> list[str]: + headers = {} + if self.api_key: + headers["authorization"] = f"Bearer {self.api_key}" + r = requests.get(f"{self.base_url}/models", headers=headers, timeout=self.timeout) + r.raise_for_status() + return [m.get("id", "") for m in r.json().get("data", [])] + _BUILTINS = { "ollama": OllamaProvider, @@ -144,3 +171,29 @@ def make_provider(spec: str, model: str | None = None, **opts) -> Provider: if model is not None: opts["model"] = model return cls(**opts) + + +def preflight(provider: Provider) -> tuple[bool, str]: + """Cheap reachability + model-presence check before joining a room. + + Returns ``(ok, message)``. Lets ``/ai start`` fail fast with a clear reason + (backend down / model not pulled / key missing) instead of erroring on the + first question. Providers without ``available_models`` are assumed reachable. + """ + discover = getattr(provider, "available_models", None) + if discover is None: + return True, f"{provider.name}: no discovery endpoint — assuming reachable" + try: + models = discover() + except Exception as e: # noqa: BLE001 — any failure means "not reachable yet" + return False, f"{provider.name}: cannot reach backend ({e})" + if provider.model in models: + return True, f"{provider.name}/{provider.model}: reachable" + if models: + sample = ", ".join(models[:8]) + more = "…" if len(models) > 8 else "" + return False, ( + f"{provider.name}: model '{provider.model}' not available. " + f"reachable models: {sample}{more}" + ) + return True, f"{provider.name}: reachable (empty model list — skipping check)" diff --git a/docs/providers.md b/docs/providers.md new file mode 100644 index 0000000..ef1420e --- /dev/null +++ b/docs/providers.md @@ -0,0 +1,126 @@ +# Connecting any model — provider guide + +The hack-house AI agent is **model-agnostic**: a *provider* is anything that can +turn a system prompt + a conversation into one reply string. You can use a +bundled adapter, point an OpenAI-compatible adapter at any endpoint, name a +reusable profile, or drop in a provider you wrote yourself. + +> Design note: this mirrors the BYO-model conventions used in the wider +> ecosystem — a named `models:` list with `{provider, model, apiBase, apiKey}` +> entries (Continue.dev) and a `model_list` of `{model, api_base, api_key}` +> behind one unified interface (LiteLLM, which `aider` builds on). One thin +> adapter for the OpenAI `/chat/completions` shape covers most backends. + +--- + +## 1. The fastest path — a named profile + +Add (or edit) `models.toml` in the repo root (or `~/.config/hh/models.toml`): + +```toml +[groq-llama] +provider = "openai" +base_url = "https://api.groq.com/openai/v1" +model = "llama-3.3-70b-versatile" +api_key_env = "GROQ_API_KEY" +``` + +Export the key, then start the agent by name: + +```bash +export GROQ_API_KEY=sk-... +python -m cmd_chat.agent --profile groq-llama --password --no-tls +# or from the TUI: /ai start groq-llama +``` + +`api_key_env` names an **environment variable**, never the key itself, so +`models.toml` is safe to commit and share. Lookup order for the file: +`$HH_MODELS_FILE` → `./models.toml` → `~/.config/hh/models.toml` (override with +`--models-file`). + +Profile keys: `provider` (required), `model`, `base_url`, `host` (Ollama), +`api_key_env`, `system`, `context_window`. CLI `--model` / `--base-url` override +the profile. + +## 2. Without a profile — explicit flags + +```bash +# local Ollama (default, private — no key) +python -m cmd_chat.agent --provider ollama --model qwen2.5:3b --no-tls + +# any OpenAI-compatible endpoint (OpenAI, Groq, Together, vLLM, LM Studio, llama.cpp…) +python -m cmd_chat.agent --provider openai \ + --base-url https://api.together.xyz/v1 --model + +# Anthropic +ANTHROPIC_API_KEY=sk-ant-... python -m cmd_chat.agent \ + --provider anthropic --model claude-opus-4-6 +``` + +Built-in providers: `ollama`, `anthropic`, `openai`. The `openai` adapter is the +universal one — most backends speak `/chat/completions`, so "any model" is +usually just `base_url` + `model` + a key. + +## 3. Discovery & preflight + +Check a backend before joining a room (neither joins): + +```bash +python -m cmd_chat.agent --profile groq-llama --list-models # enumerate models +python -m cmd_chat.agent --profile groq-llama --check # exit 0 ok / 1 fail +``` + +On a normal start the agent runs a non-fatal preflight and prints a `⚠ preflight` +warning if the backend is unreachable or the model isn't pulled — so you find out +immediately, not on the first question. In-room: + +- `/ai list` — each present agent answers with its roster line + (`name (ai) — provider/model, context N`); use it to find an agent's name + before addressing it with `/ai `. +- `/ai models` — the active agent lists what its backend can serve + (`*` marks the active model). + +## 4. Bring your own provider + +Implement three things — `name`, `model`, and `complete()`: + +```python +class MyProvider: + name = "mine" + + def __init__(self, model: str = "my-default"): + self.model = model + + def complete(self, system: str, messages: list) -> str: + # messages: list of objects with .role ("user"/"assistant") and .content + ... + return "the reply" + + def available_models(self) -> list[str]: # optional: powers discovery/preflight + return ["my-default"] +``` + +Point the agent at it with `module:Class` (no repo changes needed): + +```bash +python -m cmd_chat.agent --provider mypkg.mymodule:MyProvider +``` + +or reference it from a profile: + +```toml +[mine] +provider = "mypkg.mymodule:MyProvider" +model = "my-default" +``` + +A complete, runnable example lives in +[`examples/echo_provider.py`](../examples/echo_provider.py): + +```bash +python -m cmd_chat.agent --no-tls --password \ + --provider examples.echo_provider:EchoProvider +``` + +`available_models()` is optional — implement it to light up `--list-models`, +`--check`, and `/ai models`; omit it and those degrade gracefully. diff --git a/examples/echo_provider.py b/examples/echo_provider.py new file mode 100644 index 0000000..6f83efd --- /dev/null +++ b/examples/echo_provider.py @@ -0,0 +1,34 @@ +"""Minimal bring-your-own Provider example. + +A Provider just turns (system prompt + messages) into one reply string. Anything +with ``name``, ``model`` and a ``complete()`` method qualifies — no base class, +no SDK. Point the agent at it with the ``module:Class`` spec: + + python -m cmd_chat.agent 127.0.0.1 3000 --no-tls --password hunter2 \ + --provider examples.echo_provider:EchoProvider + +or via models.toml: + + [echo] + provider = "examples.echo_provider:EchoProvider" + model = "echo-1" + +Implementing ``available_models()`` is optional; it powers ``--list-models``, +``--check`` preflight, and the in-room ``/ai models`` command. +""" + +from __future__ import annotations + + +class EchoProvider: + name = "echo" + + def __init__(self, model: str = "echo-1"): + self.model = model + + def complete(self, system: str, messages: list) -> str: + last = messages[-1].content if messages else "" + return f"echo: {last}" + + def available_models(self) -> list[str]: # optional + return ["echo-1"] diff --git a/hh/src/app.rs b/hh/src/app.rs index 4af8b21..b7a9177 100644 --- a/hh/src/app.rs +++ b/hh/src/app.rs @@ -96,6 +96,8 @@ pub enum Net { name: String, on: bool, }, + /// A local system notice produced off-thread (e.g. async Ollama probe). + Sys(String), Err(String), Closed, } @@ -298,6 +300,7 @@ impl App { self.sudoers = sudo; } Net::Ft(_) => {} // handled in the run loop (needs out channel + disk) + Net::Sys(t) => self.sys(t), Net::Err(t) => self.err(t), Net::Closed => { self.connected = false; @@ -1108,24 +1111,102 @@ fn handle_command( if agent.is_some() { app.sys("an AI agent is already running from this client — /ai stop first"); } else { - let m = rest.trim(); - let model = if m.is_empty() { "qwen2.5:3b" } else { m }; + let arg = rest.trim(); + // A bare name (no ':' tag, no '/' path) is a models.toml profile; + // anything else is treated as a literal Ollama model tag. + let (profile, model): (Option<&str>, &str) = if arg.is_empty() { + (None, "qwen2.5:3b") + } else if arg.contains(':') || arg.contains('/') { + (None, arg) + } else { + (Some(arg), arg) + }; let name = "oracle"; - match spawn_agent(params, &app.password, name, model) { + match spawn_agent(params, &app.password, name, profile, model) { Ok(child) => { *agent = Some(child); + let desc = match profile { + Some(p) => format!("profile {p}"), + None => format!("ollama/{model}"), + }; app.sys(format!( - "⛧ summoning {name} (ollama/{model})… it will announce when online" + "⛧ summoning {name} ({desc})… it will announce when online" )); } Err(e) => app.err(format!("/ai start failed: {e}")), } } + } else if line == "/ai list" || line == "/ai models" { + // Reap an agent that already exited so we don't forward into a dead pipe. + if agent + .as_mut() + .is_some_and(|c| matches!(c.try_wait(), Ok(Some(_)))) + { + *agent = None; + } + if agent.is_some() { + // A live agent answers these itself (canned, zero model-call). + let _ = out_tx.send(WsMsg::Text(room.encrypt(line.as_bytes()))); + } else if line == "/ai list" { + app.sys("no AI agent running from this client — /ai start to summon one"); + } else { + // No agent: still useful to show what could be started locally. + app.sys("querying local ollama…"); + let tx = app_tx.clone(); + tokio::task::spawn_blocking(move || { + let msg = match local_ollama_models() { + Ok(ms) if !ms.is_empty() => format!( + "local ollama models (start one with `/ai start `): {}", + ms.join(", ") + ), + Ok(_) => "ollama is reachable but has no models pulled — \ + `ollama pull qwen2.5:3b` or run ./bootstrap-ai.sh" + .to_string(), + Err(_) => "ollama not reachable at localhost:11434 — run \ + ./bootstrap-ai.sh, or `/ai start ` for a cloud model" + .to_string(), + }; + let _ = tx.send(Net::Sys(msg)); + }); + } } else if !line.is_empty() && app.connected { let _ = out_tx.send(WsMsg::Text(room.encrypt(line.as_bytes()))); } } +/// Probe the local Ollama daemon for installed model tags. Used to answer +/// `/ai models` before any agent is summoned (the agentless path); a running +/// agent answers in-room instead. Honors `$OLLAMA_HOST`. +fn local_ollama_models() -> Result, String> { + let host = std::env::var("OLLAMA_HOST") + .ok() + .filter(|h| !h.is_empty()) + .unwrap_or_else(|| "http://localhost:11434".to_string()); + let host = host.trim_end_matches('/'); + let url = format!("{host}/api/tags"); + let client = reqwest::blocking::Client::builder() + .timeout(std::time::Duration::from_millis(1500)) + .build() + .map_err(|e| e.to_string())?; + let body: serde_json::Value = client + .get(&url) + .send() + .map_err(|e| e.to_string())? + .json() + .map_err(|e| e.to_string())?; + let models = body + .get("models") + .and_then(|m| m.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|m| m.get("name").and_then(|n| n.as_str())) + .map(str::to_string) + .collect() + }) + .unwrap_or_default(); + Ok(models) +} + #[allow(clippy::too_many_arguments)] fn spawn_launch( backend: sbx::Backend, @@ -1214,6 +1295,7 @@ fn spawn_agent( params: &net::ConnParams, password: &str, name: &str, + profile: Option<&str>, model: &str, ) -> std::result::Result { use std::process::{Command, Stdio}; @@ -1238,12 +1320,18 @@ fn spawn_agent( .arg(¶ms.ip) .arg(params.port.to_string()) .arg("--name") - .arg(name) - .arg("--provider") - .arg("ollama") - .arg("--model") - .arg(model) - .stdin(Stdio::null()) + .arg(name); + // A profile carries its own provider/model/endpoint from models.toml; + // otherwise summon a local Ollama model by tag. + match profile { + Some(p) => { + cmd.arg("--profile").arg(p); + } + None => { + cmd.arg("--provider").arg("ollama").arg("--model").arg(model); + } + } + cmd.stdin(Stdio::null()) .stdout(Stdio::from(log)) .stderr(Stdio::from(log_err)); if !password.is_empty() { diff --git a/hh/src/ui.rs b/hh/src/ui.rs index 5b1c9ee..97a2b84 100644 --- a/hh/src/ui.rs +++ b/hh/src/ui.rs @@ -129,14 +129,16 @@ fn draw_help(f: &mut Frame, area: Rect, theme: &Theme) { kv("/sbx stop", "tear down the sandbox (purges the VM)"), kv("/drive", "type into the shared shell (Esc releases)"), kv( - "/ai start [model]", - "spawn a local AI agent (default ollama/qwen2.5:3b)", + "/ai start [model|profile]", + "spawn an AI agent (ollama model tag, or a models.toml profile)", ), kv("/ai stop", "dismiss the agent you started"), kv( "/ai ", "ask an AI agent in the room (/ai if many)", ), + kv("/ai list", "list AI agents present + their provider/model"), + kv("/ai models", "show models the active agent's backend can serve"), kv( "/grant ", "let a member drive the shell (owner)", diff --git a/models.toml b/models.toml new file mode 100644 index 0000000..14ed6ac --- /dev/null +++ b/models.toml @@ -0,0 +1,55 @@ +# hack-house model profiles +# --------------------------------------------------------------------------- +# Each table is a named profile. Select one with: +# python -m cmd_chat.agent --profile groq-llama --password +# or from the TUI: +# /ai start groq-llama +# +# Keys: +# provider ollama | anthropic | openai | : (required) +# model model name the backend serves +# base_url endpoint for openai-compatible backends (Groq, vLLM, …) +# host override Ollama host (default http://localhost:11434) +# api_key_env NAME of an env var holding the key — never the key itself +# system optional system-prompt override +# context_window optional int (default 12) +# +# Secrets live in the environment, never in this file, so it is safe to commit. + +# --- local, private, default -------------------------------------------------- +[local] +provider = "ollama" +model = "qwen2.5:3b" + +[local-big] +provider = "ollama" +model = "llama3.1:8b" + +# --- openai-compatible clouds (one adapter, any endpoint) --------------------- +[groq-llama] +provider = "openai" +base_url = "https://api.groq.com/openai/v1" +model = "llama-3.3-70b-versatile" +api_key_env = "GROQ_API_KEY" + +[together] +provider = "openai" +base_url = "https://api.together.xyz/v1" +model = "meta-llama/Llama-3.3-70B-Instruct-Turbo" +api_key_env = "TOGETHER_API_KEY" + +[openai] +provider = "openai" +model = "gpt-4o-mini" +api_key_env = "OPENAI_API_KEY" + +# --- anthropic ---------------------------------------------------------------- +[claude] +provider = "anthropic" +model = "claude-opus-4-6" +api_key_env = "ANTHROPIC_API_KEY" + +# --- bring your own ----------------------------------------------------------- +# [my-model] +# provider = "mypkg.mymodule:MyProvider" +# model = "whatever-your-class-expects"