Initialize Glassmind Rust CLI and markdown vault scanner

This commit is contained in:
K. Hodges 2026-05-24 02:18:08 -07:00
parent bd9f67422a
commit fc9b2efd0b
18 changed files with 5000 additions and 3 deletions

4
.gitignore vendored
View File

@ -2,6 +2,8 @@
# will have compiled files and executables # will have compiled files and executables
debug/ debug/
target/ target/
.agent/
docs/codex/
# These are backup files generated by rustfmt # These are backup files generated by rustfmt
**/*.rs.bk **/*.rs.bk
@ -14,4 +16,4 @@ target/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/

595
Cargo.lock generated Normal file
View File

@ -0,0 +1,595 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
[[package]]
name = "anstyle-parse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "bitflags"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "clap"
version = "4.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]]
name = "colorchoice"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "glassmind"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"pulldown-cmark",
"regex",
"serde",
"serde_json",
"toml",
"tracing",
"tracing-subscriber",
"walkdir",
]
[[package]]
name = "hashbrown"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indexmap"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itoa"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "matchers"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
dependencies = [
"regex-automata",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "nu-ansi-term"
version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys",
]
[[package]]
name = "once_cell"
version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "pin-project-lite"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pulldown-cmark"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9f068eba8e7071c5f9511831b44f32c740d5adf574e990f946ddb53db2f314e"
dependencies = [
"bitflags",
"getopts",
"memchr",
"pulldown-cmark-escape",
"unicase",
]
[[package]]
name = "pulldown-cmark-escape"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
dependencies = [
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "serde_spanned"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26"
dependencies = [
"serde_core",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thread_local"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
"cfg-if",
]
[[package]]
name = "toml"
version = "0.9.12+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863"
dependencies = [
"indexmap",
"serde_core",
"serde_spanned",
"toml_datetime",
"toml_parser",
"toml_writer",
"winnow 0.7.15",
]
[[package]]
name = "toml_datetime"
version = "0.7.5+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
dependencies = [
"serde_core",
]
[[package]]
name = "toml_parser"
version = "1.1.2+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
dependencies = [
"winnow 1.0.3",
]
[[package]]
name = "toml_writer"
version = "1.1.1+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db"
[[package]]
name = "tracing"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex-automata",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
name = "unicase"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "valuable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "winnow"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
[[package]]
name = "winnow"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"

16
Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "glassmind"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = "1.0"
clap = { version = "4.5", features = ["derive"] }
pulldown-cmark = "0.13"
regex = "1.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.9"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
walkdir = "2.5"

230
README.md
View File

@ -1,2 +1,228 @@
# glassMind # Glassmind
Local-first RAG and memory infrastructure for Obsidian vaults.
> Local-first semantic retrieval for Obsidian-like markdown knowledge bases and AI workflows.
* This is in development, it doesn't run yet. Want to help? Get in contact! *
Glassmind turns folders of markdown notes into searchable semantic memory for AI tools and humans.
It works especially well with Obsidian vaults, but Obsidian is not required.
It indexes markdown, understands links/tags/headings, performs hybrid semantic retrieval, and exposes context through a CLI, HTTP API, and MCP tools.
Your notes stay local.
Your vault stays canonical.
The database is rebuildable.
No cloud required.
---
## What is this?
Glassmind is **not**:
* a chatbot
* an obsidian plugin
* an autonomous agent
* a replacement for Obsidian
* a SaaS startup trying to ingest your second brain into a valuation event
Glassmind is a **memory and retrieval layer**.
Think:
```text
Claude / Codex / Hermes / local model
Glassmind
Your Obsidian vault
```
The goal is simple:
> “Given this task, what context from my vault actually matters?”
---
# Features
## Current / Planned
* Markdown vault indexing
* Semantic search
* Hybrid retrieval
* embeddings
* keyword search
* tags
* wikilinks
* recency
* Context bundle generation
* MCP integration
* HTTP API
* Local-first operation
* Rebuildable indexes
* Incremental indexing
* Agent-safe `.agent/` workspace
* Obsidian-compatible by default
---
# Philosophy
Glassmind treats your vault like memory, not files.
```text
Obsidian markdown = source of truth
SQLite = rebuildable index/cache
Embeddings = semantic retrieval layer
```
Your notes remain human-readable markdown.
Glassmind exists to make retrieval useful, fast, and agent-friendly without turning your vault into proprietary soup.
---
# Example
```bash
glassmind index
glassmind search "local memory tool ideas"
glassmind context "help me continue the Glassmind project"
glassmind serve
```
---
# Why?
Because existing “AI memory” systems tend to be one of:
* cloud-first
* opaque
* startup-shaped
* agent-shaped
* overengineered
* weirdly hostile to user ownership
Meanwhile, many of us are already using Obsidian as informal long-term memory.
Glassmind formalizes that idea.
---
# Documentation
* [Design Document](docs/design.md)
* [FAQ](docs/faq.md)
* [HUH? (Beginners ELI5 guide)](docs/huh.md)
---
# Architecture
```text
Obsidian Vault
Indexer
SQLite + Vector Search
CLI / HTTP / MCP
Agents and local models
```
---
# Tech Stack
Planned v1 stack:
```text
Rust
SQLite
sqlite-vec
Ollama embeddings
Axum
MCP
```
---
# Status
Early development.
Currently building:
* vault indexer
* chunking
* semantic retrieval
* context generation
---
# Security / Privacy
Glassmind is designed to run locally.
By default:
* binds to localhost
* keeps notes local
* avoids modifying user notes
* stores indexes separately
* treats markdown as canonical
No telemetry is planned.
No cloud dependency is required.
No “AI-enhanced knowledge monetization platform” nonsense.
No enshitification ever. I stake my professional reputation on it.
---
# Name
Why “Glassmind”?
Because its supposed to feel like peering through semantic glass into your own thoughts.
Also because `brainworm` felt a little aggressive for a tool people may actually deploy at work.
---
# Contributing
Eventually.
Right now the project is still in the “rapid architectural mutation” phase.
If you want to throw me a PR or two I'll give you one (1) really good compliment.
---
# Legal
Glassmind is an independent project and is not affiliated with or endorsed by [Obsidian](https://obsidian.md).
---
# I am a recruiter
Hi.
You may also enjoy:
* [LinkedIn / khodges42](https://linkedin.com/in/khodges42?utm_source=chatgpt.com)

1199
docs/design.md Normal file

File diff suppressed because it is too large Load Diff

986
docs/dev/tasks.md Normal file
View File

@ -0,0 +1,986 @@
```md id="v2l7nq"
# tasks.md
# Glassmind Tasks
## Project Rules
- Prefer small, shippable tasks.
- Every stage should leave the project runnable.
- Avoid premature abstraction.
- Favor inspectability over magic.
- Small application philosophy
- Markdown files are canonical.
- Database state must be rebuildable.
- Local-first is a hard requirement.
- No cloud dependency in core architecture.
- No enshittification.
---
# Phase 1 — Project Skeleton & Foundations
## [x] GM-001 — Initialize Rust workspace
### Goals
- Create Rust project
- Verify build pipeline
- Establish workspace structure
### Tasks
- Run `cargo init`
- Create `/src`
- Create `/examples`
- Create `/fixtures`
- Create `/scripts`
- Create initial `.gitignore`
- Add GPL
- Verify clean build
### Acceptance Criteria
- `cargo build` succeeds
- Repo structure exists
- Project compiles on clean machine
---
## [x] GM-002 — Add core dependencies
### Goals
Install foundational crates.
### Tasks
Add:
- `clap`
- `serde`
- `serde_json`
- `toml`
- `tracing`
- `tracing-subscriber`
- `anyhow`
### Acceptance Criteria
- Project builds
- Logging works
- Config parsing stub exists
---
## [x] GM-003 — Implement CLI skeleton
### Goals
Create top-level CLI interface.
### Tasks
Add commands:
- `init`
- `index`
- `search`
- `context`
- `serve`
- `stats`
### Acceptance Criteria
- `glassmind --help` works
- Subcommands render correctly
- Unknown commands fail cleanly
---
## [x] GM-004 — Create config loader
### Goals
Load user config from disk.
### Tasks
- Define `glassmind.toml`
- Create config structs
- Implement config parsing
- Add defaults
- Add validation
- Add config path resolution
### Acceptance Criteria
- Config loads successfully
- Missing config generates defaults
- Invalid config errors clearly
---
## [x] GM-005 — Implement logging setup
### Goals
Establish consistent logging.
### Tasks
- Configure tracing subscriber
- Add log levels
- Add debug mode
- Add structured logs
- Add startup logging
### Acceptance Criteria
- Logs visible in CLI
- Debug mode works
- Errors produce stack traces
---
# Phase 2 — Vault Discovery
## [x] GM-006 — Implement vault walker
### Goals
Recursively discover markdown files.
### Tasks
- Add `walkdir`
- Walk configured vault path
- Detect `.md` files
- Skip ignored directories
- Support nested folders
- Add file count metrics
### Acceptance Criteria
- Vault scan succeeds
- Ignores work correctly
- Correct markdown count displayed
---
## [x] GM-007 — Implement ignore handling
### Goals
Allow configurable ignore patterns.
### Tasks
Ignore:
- `.git`
- `.obsidian`
- `.trash`
- `.agent/cache`
Add configurable ignores.
### Acceptance Criteria
- Ignored folders skipped
- Configurable ignores work
- No accidental recursion
---
## [x] GM-008 — Add note metadata extraction
### Goals
Extract basic note metadata.
### Tasks
Extract:
- path
- filename
- title
- modified timestamp
- file size
### Acceptance Criteria
- Metadata visible in debug output
- Data stored internally
---
## [x] GM-009 — Add markdown parsing
### Goals
Parse markdown structure.
### Tasks
Add:
- heading extraction
- paragraph extraction
- code block detection
- list detection
Suggested crate:
- `pulldown-cmark`
### Acceptance Criteria
- Headings parsed correctly
- Parser handles malformed markdown gracefully
---
## [x] GM-010 — Extract wikilinks
### Goals
Detect Obsidian-style links.
### Tasks
Support:
- `[[note]]`
- `[[note|alias]]`
- `[[folder/note]]`
Store:
- source
- target
- alias
### Acceptance Criteria
- Links parsed correctly
- Links stored in memory
---
## [ ] GM-011 — Extract tags
### Goals
Parse tags from notes.
### Tasks
Support:
- inline tags
- frontmatter tags
Normalize:
- lowercase
- trim whitespace
### Acceptance Criteria
- Tags extracted consistently
- Duplicate tags removed
---
# Phase 3 — Database Layer
## [ ] GM-012 — Add SQLite integration
### Goals
Create local metadata database.
### Tasks
- Add SQLite crate
- Create DB initialization
- Create migrations
- Create schema bootstrap
### Acceptance Criteria
- DB initializes automatically
- Schema created successfully
---
## [ ] GM-013 — Create notes table
### Goals
Store note metadata.
### Tasks
Create schema for:
- notes
- paths
- timestamps
- hashes
### Acceptance Criteria
- Notes persist correctly
- Duplicate handling works
---
## [ ] GM-014 — Create chunks table
### Goals
Store retrieval chunks.
### Tasks
Store:
- note ID
- chunk content
- heading path
- line numbers
- token estimates
### Acceptance Criteria
- Chunks persist correctly
- Relationships resolve correctly
---
## [ ] GM-015 — Add content hashing
### Goals
Detect changed notes efficiently.
### Tasks
- Add SHA256 hashing
- Hash note content
- Compare hashes on reindex
- Skip unchanged files
### Acceptance Criteria
- Incremental indexing works
- Unchanged files skipped
---
# Phase 4 — Chunking
## [ ] GM-016 — Implement heading-based chunking
### Goals
Split notes into useful retrieval units.
### Tasks
- Split by heading
- Preserve heading hierarchy
- Preserve ordering
- Preserve note references
### Acceptance Criteria
- Chunks remain readable
- Context boundaries make sense
---
## [ ] GM-017 — Add fallback chunk splitting
### Goals
Handle giant sections safely.
### Tasks
- Add max chunk size
- Add overlap windows
- Preserve sentence boundaries if possible
### Acceptance Criteria
- Large files chunk correctly
- No giant retrieval blobs
---
## [ ] GM-018 — Estimate token counts
### Goals
Prepare for LLM context budgeting.
### Tasks
- Add rough token estimator
- Store token counts
- Expose in debug mode
### Acceptance Criteria
- Estimates reasonably accurate
- Context budgeting possible
---
# Phase 5 — Search
## [ ] GM-019 — Implement SQLite FTS search
### Goals
Add keyword search.
### Tasks
- Enable FTS5
- Create search index
- Implement search query
- Add snippet extraction
- Add ranking
### Acceptance Criteria
- Search returns relevant results
- Results ranked correctly
---
## [ ] GM-020 — Implement basic CLI search command
### Goals
Expose usable search interface.
### Tasks
- Add search formatting
- Show paths
- Show headings
- Show snippets
- Add JSON output option
### Acceptance Criteria
- `glassmind search` usable daily
- Results readable
- JSON output valid
---
```md id="5m9zsw"
## Embeddings
### [ ] GM-021 — Create embedding backend trait
#### Goals
Abstract embedding providers behind a common interface.
#### Tasks
- Create `EmbeddingBackend` trait
- Define embedding request/response types
- Add async support if needed
- Add error handling
- Add provider config support
#### Acceptance Criteria
- Multiple backends can implement trait
- Search pipeline independent from provider implementation
---
### [ ] GM-022 — Implement Ollama embedding backend
#### Goals
Generate embeddings locally using Ollama.
#### Tasks
- Add Ollama HTTP client
- Implement embedding requests
- Add configurable embedding model
- Add retry handling
- Add timeout handling
#### Acceptance Criteria
- Query embeddings generated successfully
- Chunk embeddings generated successfully
- Backend configurable through TOML
---
### [ ] GM-023 — Add embedding generation pipeline
#### Goals
Generate embeddings during indexing.
#### Tasks
- Embed chunks during index phase
- Skip unchanged embeddings
- Batch embedding requests
- Add embedding queue abstraction
- Add progress reporting
#### Acceptance Criteria
- Vault indexing produces embeddings
- Reindex skips unchanged chunks
---
### [ ] GM-024 — Integrate sqlite-vec
#### Goals
Store and search vectors locally.
#### Tasks
- Add sqlite-vec dependency
- Create vector schema
- Store chunk vectors
- Add nearest-neighbor search
- Validate vector dimensions
#### Acceptance Criteria
- Embeddings persist correctly
- Similarity search returns results
---
### [ ] GM-025 — Implement semantic search
#### Goals
Search by meaning instead of keywords.
#### Tasks
- Embed query text
- Retrieve nearest vectors
- Rank results by similarity
- Return chunk metadata
- Add configurable result limits
#### Acceptance Criteria
- Semantically related notes retrieved
- Search quality noticeably useful
---
## Hybrid Retrieval
### [ ] GM-026 — Create retrieval scoring model
#### Goals
Combine multiple ranking systems.
#### Tasks
Add weighted scoring for:
- semantic similarity
- keyword relevance
- recency
- tags
- wikilinks
- path/project affinity
#### Acceptance Criteria
- Final ranking combines all scoring sources
- Weights configurable
---
### [ ] GM-027 — Add recency boosting
#### Goals
Favor recently active notes.
#### Tasks
- Define recency decay function
- Add configurable recency weights
- Support pinned notes
- Add debug scoring output
#### Acceptance Criteria
- Recent notes boosted appropriately
- Old notes still retrievable
---
### [ ] GM-028 — Add wikilink graph weighting
#### Goals
Use note relationships during retrieval.
#### Tasks
- Calculate link adjacency
- Boost linked neighbors
- Support bidirectional relationships
- Add graph traversal depth limit
#### Acceptance Criteria
- Related linked notes boosted
- Retrieval continuity improved
---
### [ ] GM-029 — Add retrieval debug mode
#### Goals
Make ranking explainable.
#### Tasks
Display:
- semantic score
- keyword score
- recency score
- tag score
- link score
- final score
#### Acceptance Criteria
- Users can inspect ranking behavior
- Retrieval tuning becomes practical
---
## Context Bundles
### [ ] GM-030 — Create context bundle builder
#### Goals
Generate LLM-ready retrieval payloads.
#### Tasks
- Define context bundle structure
- Deduplicate overlapping chunks
- Group by note
- Preserve ordering
- Add metadata blocks
#### Acceptance Criteria
- Context bundles readable
- Context bundles useful for LLM prompts
---
### [ ] GM-031 — Add token budgeting
#### Goals
Prevent oversized context payloads.
#### Tasks
- Track token estimates
- Add configurable token budget
- Trim low-priority chunks
- Preserve high-score chunks first
#### Acceptance Criteria
- Context stays within configured budget
- Retrieval quality remains useful
---
### [ ] GM-032 — Add context summarization hooks
#### Goals
Prepare for future summarization support.
#### Tasks
- Define summarizer interface
- Add optional summarization stage
- Add summary metadata fields
- Support disabling summarization
#### Acceptance Criteria
- Pipeline supports optional summarization
- Core retrieval still functions without summaries
---
### [ ] GM-033 — Implement `glassmind context`
#### Goals
Expose high-level retrieval workflow.
#### Tasks
- Add CLI command
- Format markdown output
- Add JSON mode
- Include sources
- Include retrieval metadata
#### Acceptance Criteria
- Command usable directly by humans
- Output usable by agents
---
## HTTP API
### [ ] GM-034 — Add Axum server skeleton
#### Goals
Expose Glassmind over HTTP.
#### Tasks
- Add Axum dependency
- Create server bootstrap
- Add config support
- Add graceful shutdown
- Bind localhost by default
#### Acceptance Criteria
- Server starts successfully
- Local requests succeed
---
### [ ] GM-035 — Implement `/search` endpoint
#### Goals
Expose search over HTTP.
#### Tasks
- Define request schema
- Define response schema
- Add pagination
- Add JSON serialization
- Add validation
#### Acceptance Criteria
- Endpoint returns valid search results
- Errors handled cleanly
---
### [ ] GM-036 — Implement `/context` endpoint
#### Goals
Expose context retrieval API.
#### Tasks
- Add context request schema
- Support token budget parameter
- Return structured context bundles
- Include source metadata
#### Acceptance Criteria
- API returns usable context payloads
- Response structure documented
---
### [ ] GM-037 — Implement `/notes/{id}` endpoint
#### Goals
Allow direct note retrieval.
#### Tasks
- Fetch note metadata
- Fetch chunk data
- Return markdown content
- Add error handling
#### Acceptance Criteria
- Notes retrievable by ID
- Missing notes handled correctly
---
### [ ] GM-038 — Add `/health` and `/stats`
#### Goals
Support monitoring/debugging.
#### Tasks
- Add health endpoint
- Add DB stats
- Add vault metrics
- Add embedding counts
#### Acceptance Criteria
- Health checks usable
- Stats endpoint informative
---
## MCP Support
### [ ] GM-039 — Create MCP server skeleton
#### Goals
Allow AI tools to call Glassmind directly.
#### Tasks
- Add MCP transport support
- Define tool registry
- Implement request dispatch
- Add structured tool responses
#### Acceptance Criteria
- MCP server starts successfully
- Tool calls function correctly
---
### [ ] GM-040 — Implement `glassmind_search` MCP tool
#### Goals
Expose search through MCP.
#### Tasks
- Define tool schema
- Add search execution
- Return structured results
- Include source paths
#### Acceptance Criteria
- MCP clients can search successfully
---
### [ ] GM-041 — Implement `glassmind_context` MCP tool
#### Goals
Expose context bundles through MCP.
#### Tasks
- Add context generation
- Add token budgeting
- Return structured context payloads
#### Acceptance Criteria
- MCP clients receive usable context bundles
---
### [ ] GM-042 — Implement `glassmind_read` MCP tool
#### Goals
Allow agents to inspect notes directly.
#### Tasks
- Fetch note content
- Support chunk-specific reads
- Add note metadata
- Add error handling
#### Acceptance Criteria
- Agents can retrieve note contents reliably
---
### [ ] GM-043 — Add MCP integration examples
#### Goals
Document real-world integration.
#### Tasks
- Add Claude Desktop example
- Add Codex example
- Add local agent example
- Add config examples
#### Acceptance Criteria
- Users can integrate Glassmind without guesswork
---
## Incremental Indexing
### [ ] GM-044 — Add file change detection
#### Goals
Avoid full vault reindexing.
#### Tasks
- Compare content hashes
- Detect added files
- Detect deleted files
- Detect modified files
#### Acceptance Criteria
- Incremental indexing functions correctly
- Unchanged notes skipped
---
### [ ] GM-045 — Add filesystem watch mode
#### Goals
Support live vault updates.
#### Tasks
- Add filesystem watcher
- Debounce rapid changes
- Trigger partial reindex
- Add watch logging
#### Acceptance Criteria
- File edits reflected automatically
- No runaway indexing loops
---
### [ ] GM-046 — Add partial embedding regeneration
#### Goals
Avoid recomputing unchanged vectors.
#### Tasks
- Detect changed chunks
- Recompute only dirty embeddings
- Preserve existing vectors
- Handle deleted chunks
#### Acceptance Criteria
- Reindex significantly faster after small edits
---
## Agent Workspace
### [ ] GM-047 — Create `.agent/` workspace structure
#### Goals
Establish safe agent-owned storage.
#### Tasks
Create:
- `.agent/memories`
- `.agent/tasks`
- `.agent/summaries`
- `.agent/logs`
- `.agent/cache`
#### Acceptance Criteria
- Workspace generated automatically
- Structure documented
---
### [ ] GM-048 — Add memory capture commands
#### Goals
Allow structured memory persistence.
#### Tasks
Add:
- `capture-memory`
- `capture-task`
- `capture-decision`
Store entries as markdown.
#### Acceptance Criteria
- Commands append correctly
- Entries index correctly
---
### [ ] GM-049 — Index `.agent/` content
#### Goals
Allow generated memory retrieval.
#### Tasks
- Include `.agent/` in indexing pipeline
- Tag generated content
- Preserve provenance metadata
#### Acceptance Criteria
- Agent-generated notes searchable
- Provenance visible
---
### [ ] GM-050 — Add retrieval audit logging
#### Goals
Track retrieval behavior for debugging.
#### Tasks
Log:
- query
- retrieved chunks
- retrieval scores
- timestamp
- requesting client
#### Acceptance Criteria
- Retrievals traceable
- Logs useful for tuning/debugging
```
---
# What's Next
## Retrieval Quality
- Evaluation datasets
- Ranking tuning
- Query debugging
- Explainable scoring
## Performance
- Parallel indexing
- Cached embeddings
- Batch embedding generation
- Large vault optimization
## Future Ideas
- Git history awareness
- Temporal retrieval
- Canvas parsing
- Code-aware chunking
- Multi-vault support
- Graph exploration
- Retrieval visualization
- Vault analytics
- Semantic diffing
- “What changed?” context reports
- Local reranking models
- Session continuity memory
- Agent-safe write proposals
```

535
docs/faq.md Normal file
View File

@ -0,0 +1,535 @@
````md
# FAQ.md
# Frequently Asked Questions
## What is Glassmind?
Glassmind is a local-first semantic retrieval and memory system for markdown knowledge bases.
It indexes markdown files, builds semantic and structural search indexes, and exposes retrieval APIs for:
- AI assistants
- local models
- agents
- MCP clients
- automation tooling
- humans using the CLI directly
Glassmind is designed to work especially well with Obsidian vaults, but only requires a directory of markdown files.
---
# What problem is Glassmind solving?
Modern LLMs are powerful but stateless.
They:
- lose context
- forget projects
- cannot inherently understand your local files
- have limited prompt windows
- hallucinate when context is missing
Meanwhile many people already maintain:
- engineering documentation
- project journals
- research notes
- worldbuilding
- task tracking
- personal knowledge systems
inside markdown repositories.
Glassmind bridges those worlds.
It provides:
- retrieval
- semantic search
- context construction
- memory indexing
over existing markdown workflows.
---
# Is Glassmind an AI agent?
No.
Glassmind is retrieval infrastructure.
It does not:
- autonomously execute tasks
- reason independently
- act as a chatbot
- replace orchestration frameworks
It is closer to:
- a search engine
- a semantic index
- a memory API
- a retrieval layer
Agents and AI tools call Glassmind to retrieve relevant context.
---
# Is Glassmind tied to Obsidian?
No.
Glassmind is markdown-native.
It works with:
- Obsidian vaults
- plain markdown directories
- docs repositories
- PKM systems
- engineering notebooks
- wiki-style folder structures
Obsidian is simply a particularly good fit because:
- it is local-first
- it uses markdown
- it has strong linking semantics
- it is widely adopted
Glassmind treats markdown files as canonical regardless of editor.
---
# Why markdown?
Because markdown is:
- portable
- durable
- inspectable
- editor-agnostic
- version-control friendly
- human-readable
Glassmind intentionally avoids proprietary storage formats for primary knowledge.
The markdown files remain the source of truth.
Everything else is rebuildable.
---
# What is the source of truth?
The markdown files.
Glassmind builds:
- indexes
- caches
- embeddings
- retrieval metadata
on top of them.
The database is disposable and rebuildable.
If Glassmind disappears, the notes still work.
---
# What database does Glassmind use?
Planned v1:
```text
SQLite
sqlite-vec
```
SQLite stores:
- note metadata
- chunk metadata
- tags
- links
- retrieval state
- indexes
sqlite-vec stores:
- semantic vectors ("embeddings")
The database is local and rebuildable.
---
# Why SQLite?
Because SQLite is:
- local-first
- fast enough
- battle-tested
- portable
- operationally simple
Glassmind intentionally avoids requiring:
- external database servers
- cloud infrastructure
- distributed systems
- operational overhead
for normal usage.
---
# What are embeddings?
Embeddings are vector representations of semantic meaning.
A chunk of text is transformed into a vector like:
```text
[0.12, -0.44, 0.89, ...]
```
Vectors with similar meaning are located near each other mathematically.
This enables semantic search.
Example:
```text
"persistent semantic cache"
```
can match:
```text
"local memory system"
```
even if the wording differs.
---
# Does Glassmind require online APIs?
No.
Glassmind is designed for local operation.
Planned local embedding options:
- Ollama
- fastembed-rs
- llama.cpp-compatible backends
Cloud embeddings may eventually be optional, but local-first is the default philosophy.
---
# What is hybrid retrieval?
Glassmind does not rely solely on embeddings.
Retrieval combines:
- semantic similarity
- keyword matching
- tags
- wikilinks
- recency
- project/path weighting
- hot memory boosting
This generally performs better than pure vector search.
---
# What is a chunk?
A chunk is a retrieval unit.
Instead of embedding entire files, Glassmind splits documents into smaller pieces.
Usually:
- heading sections
- paragraphs
- task blocks
- code blocks
- fixed-size fallback windows
Chunking improves:
- retrieval quality
- precision
- context density
- token efficiency
---
# What is a context bundle?
A context bundle is an LLM-ready retrieval result.
Instead of returning raw search matches only, Glassmind can assemble:
- relevant chunks
- related notes
- recent project activity
- linked concepts
- source references
into a structured payload optimized for AI consumption.
Example:
```text
"Help me continue the Glassmind architecture work"
```
might retrieve:
- recent architecture notes
- TODOs
- design decisions
- linked experiments
- related discussions
within a configurable token budget.
---
# What is MCP?
MCP stands for:
```text
Model Context Protocol
```
It is a protocol used by AI tools to interact with external systems and tools.
Glassmind plans to expose MCP-compatible retrieval tools such as:
```text
glassmind_search
glassmind_context
glassmind_read
```
This allows tools like Claude Code or other agent systems to retrieve vault context directly.
---
# How is Glassmind different from traditional RAG systems?
Many RAG systems are:
- cloud-first
- opaque
- tightly coupled to vector databases
- detached from user workflows
- document-ingestion pipelines rather than knowledge systems
Glassmind is designed around:
- local-first operation
- markdown-native workflows
- inspectability
- rebuildability
- human-readable source material
- AI + human co-usage
Glassmind assumes the markdown corpus is already meaningful.
It focuses on retrieval quality and continuity.
---
# How is Glassmind different from vector databases?
Vector databases store embeddings and perform nearest-neighbor search.
Glassmind is:
- retrieval orchestration
- indexing
- chunking
- metadata extraction
- semantic ranking
- context assembly
- markdown-aware infrastructure
Glassmind may use vector storage internally, but it is not merely a vector DB wrapper.
---
# Will Glassmind modify my notes?
By default:
- no direct user note modification
Glassmind may optionally write to:
```text
.agent/
```
for:
- summaries
- logs
- generated memory
- task state
- context artifacts
Future configurable modes may support:
- proposed diffs
- explicit approvals
- direct modification
but user ownership and safety are priorities.
---
# Why not just use grep or ripgrep?
Keyword search is extremely useful and Glassmind still supports it.
But semantic retrieval solves problems like:
```text
"I know I wrote about this concept but I forgot the terminology."
```
Glassmind combines:
- keyword retrieval
- semantic retrieval
- structural metadata
- recency
- graph relationships
rather than replacing traditional search entirely.
---
# Why not use a graph database?
Maybe eventually.
But for v1:
- simplicity
- rebuildability
- portability
- operational sanity
matter more.
SQLite plus semantic indexing is likely sufficient for:
- personal vaults
- power-user vaults
- local AI workflows
Graph semantics can still exist logically without introducing a distributed graph infrastructure problem on day one.
---
# Is Glassmind intended for teams?
Not initially.
The primary target is:
- individuals
- researchers
- engineers
- writers
- local AI workflows
- personal knowledge systems
Future multi-user support is possible but not the immediate focus.
---
# What does “local-first” actually mean here?
The intended default behavior is:
- local storage
- localhost-only networking
- optional offline operation
- local embeddings
- markdown canonical storage
- rebuildable indexes
- no required cloud dependency
- no telemetry
Glassmind should remain usable:
- disconnected
- self-hosted
- archived
- years into the future
---
# What does “hot memory” mean?
Glassmind conceptually separates retrieval into:
- hot memory
- warm memory
- cold memory
Hot memory includes:
- recent notes
- active projects
- pinned information
- recently retrieved context
Cold memory still exists, but is less likely to be automatically surfaced.
This helps context selection remain relevant without deleting historical information.
---
# What are the long-term goals?
Long-term goals include:
- strong retrieval quality
- excellent local AI workflows
- durable markdown-native memory infrastructure
- robust MCP integration
- context continuity across sessions
- transparent retrieval behavior
- inspectable ranking systems
- ergonomic semantic search
Not:
- replacing human thought
- building autonomous AGI office workers
- trapping users inside proprietary ecosystems
---
# Why is the project opinionated?
Because retrieval quality and long-term maintainability depend heavily on architecture choices.
Glassmind intentionally prefers:
- explicit systems
- rebuildable state
- inspectability
- portability
- user ownership
- operational simplicity
over:
- hidden magic
- giant opaque pipelines
- cloud dependence
- maximal abstraction
---
# Why the name “Glassmind”?
The original idea was:
```text
semantic transparency into your own thoughts
```
The system is supposed to feel like:
- peering through glass
- inspecting memory
- traversing thought structures
rather than interacting with a black box.
Also it sounded less alarming than some of the other candidate names.
````

470
docs/huh.md Normal file
View File

@ -0,0 +1,470 @@
# Okay, what *is* this thing?
Glassmind is a tool that helps AI systems search and understand your notes without uploading your brain to somebody elses servers.
If you use:
- Obsidian
- markdown notes
- personal knowledge management tools
- giant folders full of half-finished thoughts
- daily notes
- project logs
- research docs
- creative writing
- engineering notes
…Glassmind is designed to make those notes actually usable by AI tools.
---
# The Short Version
Glassmind turns your Obsidian vault into something AI can search intelligently.
Not just:
```text
find exact words
```
but:
```text
find ideas related to what I mean
```
without requiring:
- cloud services
- subscriptions
- proprietary formats
- uploading your notes to random startups
---
# Explain It Like Im Normal
Imagine you have:
- thousands of notes
- years of project ideas
- meeting notes
- technical docs
- TODOs
- journal entries
- random fragments of thoughts
You vaguely remember writing something useful six months ago.
You search:
```text
"local memory system"
```
But the note was actually called:
```text
"persistent semantic context cache"
```
Normal search often fails there.
Glassmind is designed to make that search work anyway.
---
# How?
Glassmind builds an index of your notes.
Think of it like:
- a library catalog
- a search engine
- a map of your vault
- a memory assistant
It reads your markdown files and stores:
- note titles
- headings
- tags
- links
- sections
- semantic fingerprints ("embeddings")
Then when you search, it tries to find notes related by:
- meaning
- keywords
- tags
- recency
- links between notes
- project relationships
---
# What Are “Embeddings”?
This is the scary AI word everyone uses without explaining.
An embedding is basically:
```text
a mathematical fingerprint of meaning
```
Glassmind converts chunks of text into vectors (lists of numbers) that represent semantic similarity.
Meaning:
```text
"local memory tool"
```
can match:
```text
"persistent semantic cache"
```
even though the words are different.
This is what makes modern semantic search possible.
---
# Is This Another AI?
Not really.
Glassmind is infrastructure.
It does not:
- roleplay
- think
- chat
- plan your life
- replace your notes
It retrieves context.
Think:
```text
AI assistant ← Glassmind ← Your notes
```
Glassmind is the memory layer.
---
# Why Not Just Use ChatGPT Directly?
You can.
But large language models have bad long-term memory.
They:
- lose context
- forget projects
- hallucinate
- cannot automatically understand your vault structure
- do not inherently “know your notes”
Glassmind helps solve that by retrieving useful context automatically.
---
# Why Obsidian?
Because Obsidian is already:
- local-first
- markdown-based
- widely used
- human-readable
- flexible
- not tied to a proprietary database
Glassmind treats your Obsidian vault as the canonical source of truth.
Your notes remain:
- plain files
- portable
- editable without Glassmind
- future-proof
If Glassmind disappeared tomorrow, your notes would still work.
That is intentional.
---
# What Does “Local-First” Mean?
It means:
- your notes stay on your machine
- you control the files
- the system works offline
- cloud services are optional
- the software is designed around ownership
Glassmind is intentionally designed to avoid:
- vendor lock-in
- telemetry creep
- cloud dependency
- “AI platform” nonsense
- enshittification
---
# What Does Glassmind Actually Do?
## Indexing
Glassmind scans your vault and builds a searchable index.
---
## Semantic Search
Find related ideas, not just exact words.
---
## Context Bundles
This is one of the big goals.
Instead of dumping entire folders into an AI prompt, Glassmind tries to gather:
```text
the notes that actually matter
```
for the current task.
Example:
```text
"Help me continue my game engine project"
```
Glassmind might return:
- recent engine notes
- TODOs
- architecture docs
- related experiments
- previous decisions
- linked concepts
This gives AI tools much better context.
---
# What Is RAG?
RAG means:
```text
Retrieval-Augmented Generation
```
Which is an extremely annoying phrase for a simple idea:
```text
Find useful information before asking the AI to answer.
```
Without RAG:
```text
AI guesses from training data
```
With RAG:
```text
AI uses your actual notes/documents
```
Glassmind is a RAG system for Obsidian vaults.
---
# Is This Replacing Obsidian?
No.
Obsidian remains:
- the note editor
- the vault UI
- the writing environment
- the graph view
- the human-facing tool
Glassmind is:
- indexing
- retrieval
- semantic search
- memory infrastructure
- agent tooling
---
# Is This Safe?
The project is designed around:
- local-first storage
- rebuildable indexes
- markdown as source of truth
- minimal hidden state
By default, Glassmind should avoid modifying user notes directly.
Instead it may use:
```text
.agent/
```
for:
- generated summaries
- memory captures
- task state
- logs
- temporary outputs
The idea is:
- your notes belong to you
- generated content is separated
- the system stays understandable
---
# Who Is This For?
Probably:
- software engineers
- researchers
- writers
- worldbuilders
- Obsidian users
- AI workflow nerds
- people building local AI setups
- people tired of cloud everything
---
# What Is This NOT For?
Probably not:
- enterprise surveillance software
- replacing databases
- fully autonomous AGI agent swarms
- “AI employees”
- growth-hacking your notes
At least not intentionally.
---
# Why Does This Exist?
Because many people already use Obsidian as:
- memory
- project state
- idea storage
- engineering documentation
- thinking infrastructure
But AI systems are still surprisingly bad at interacting with that information cleanly.
Glassmind exists to bridge that gap without taking ownership away from the user.
---
# Philosophy
Glassmind is opinionated about a few things.
## Your Notes Should Stay Yours
Markdown files are the canonical source of truth.
---
## Local-First Matters
Software should still function when:
- offline
- self-hosted
- unsupported
- five years old
---
## AI Should Augment Retrieval, Not Replace Thought
Glassmind is designed to help:
- surface context
- reduce friction
- improve continuity
Not automate human meaning out of existence.
---
## Avoid Hidden Magic
The system should be:
- inspectable
- debuggable
- rebuildable
- understandable
If the index breaks, rebuild it.
If the retrieval is bad, improve scoring.
If the AI hallucinates, expose sources.
---
# The Dream
The dream is not:
```text
"AI writes your life for you"
```
The dream is:
```text
"AI can finally understand your existing context well enough to be genuinely useful"
```
Thats a very different goal.
---
# Final Summary
Glassmind is:
```text
local semantic memory infrastructure for Obsidian vaults
```
It helps AI systems retrieve useful context from your notes while keeping:
- ownership
- portability
- transparency
- local control
intact.
Or, less formally:
```text
It lets the robot read your notes without handing your brain to a startup.
```

1
examples/.gitkeep Normal file
View File

@ -0,0 +1 @@

1
fixtures/.gitkeep Normal file
View File

@ -0,0 +1 @@

33
glassmind.toml Normal file
View File

@ -0,0 +1,33 @@
[vault]
path = "."
[index]
include_agent_dir = true
ignore_dirs = [
".git",
".obsidian",
".trash",
".agent/cache",
]
chunk_target_tokens = 500
chunk_overlap_tokens = 80
[embeddings]
backend = "ollama"
model = "nomic-embed-text"
url = "http://localhost:11434"
[search]
semantic_weight = 0.55
keyword_weight = 0.25
recency_weight = 0.1
link_weight = 0.05
tag_weight = 0.05
[writes]
mode = "agent-only"
agent_dir = ".agent"
[server]
host = "127.0.0.1"
port = 7331

1
scripts/.gitkeep Normal file
View File

@ -0,0 +1 @@

70
src/cli.rs Normal file
View File

@ -0,0 +1,70 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand, ValueEnum};
#[derive(Debug, Parser)]
#[command(name = "glassmind")]
#[command(about = "Local-first retrieval over markdown vaults")]
#[command(version)]
pub struct Cli {
/// Path to glassmind.toml.
#[arg(long, global = true)]
pub config: Option<PathBuf>,
/// Override the vault path from config.
#[arg(long, global = true)]
pub vault: Option<PathBuf>,
/// Enable debug logging.
#[arg(long, global = true)]
pub debug: bool,
#[command(subcommand)]
pub command: Commands,
}
#[derive(Debug, Subcommand)]
pub enum Commands {
/// Create a starter config and the agent-owned workspace.
Init {
/// Overwrite an existing glassmind.toml.
#[arg(long)]
force: bool,
},
/// Scan the configured vault and report discovered markdown notes.
Index {
/// Emit JSON instead of text.
#[arg(long)]
json: bool,
},
/// Search the current markdown vault with lightweight local matching.
Search {
query: String,
#[arg(short, long, default_value_t = 10)]
limit: usize,
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
output: OutputFormat,
},
/// Build a human-readable context bundle from matching notes.
Context {
query: String,
#[arg(short, long, default_value_t = 5)]
limit: usize,
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
output: OutputFormat,
},
/// Start the future localhost HTTP API.
Serve,
/// Show vault scan metrics.
Stats {
/// Emit JSON instead of text.
#[arg(long)]
json: bool,
},
}
#[derive(Clone, Debug, ValueEnum)]
pub enum OutputFormat {
Text,
Json,
}

174
src/config.rs Normal file
View File

@ -0,0 +1,174 @@
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result, anyhow, bail};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Config {
pub vault: VaultConfig,
pub index: IndexConfig,
pub embeddings: EmbeddingsConfig,
pub search: SearchConfig,
pub writes: WritesConfig,
pub server: ServerConfig,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct VaultConfig {
pub path: PathBuf,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct IndexConfig {
pub include_agent_dir: bool,
pub ignore_dirs: Vec<String>,
pub chunk_target_tokens: usize,
pub chunk_overlap_tokens: usize,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct EmbeddingsConfig {
pub backend: String,
pub model: String,
pub url: String,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SearchConfig {
pub semantic_weight: f32,
pub keyword_weight: f32,
pub recency_weight: f32,
pub link_weight: f32,
pub tag_weight: f32,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct WritesConfig {
pub mode: String,
pub agent_dir: PathBuf,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct ServerConfig {
pub host: String,
pub port: u16,
}
impl Config {
pub fn load(path: Option<&Path>) -> Result<Self> {
let path = path
.map(Path::to_path_buf)
.unwrap_or_else(Self::default_path);
if !path.exists() {
return Ok(Self::default());
}
let raw = fs::read_to_string(&path)
.with_context(|| format!("failed to read config {}", path.display()))?;
toml::from_str(&raw).with_context(|| format!("invalid config {}", path.display()))
}
pub fn default_path() -> PathBuf {
PathBuf::from("glassmind.toml")
}
pub fn with_cli_vault(mut self, vault: Option<PathBuf>) -> Self {
if let Some(vault) = vault {
self.vault.path = vault;
}
self
}
pub fn validate(&self) -> Result<()> {
if self.vault.path.as_os_str().is_empty() {
bail!("vault.path must not be empty");
}
if self.index.chunk_target_tokens == 0 {
bail!("index.chunk_target_tokens must be greater than zero");
}
if self.index.chunk_overlap_tokens >= self.index.chunk_target_tokens {
bail!("index.chunk_overlap_tokens must be smaller than index.chunk_target_tokens");
}
if self.server.port == 0 {
bail!("server.port must be greater than zero");
}
match self.writes.mode.as_str() {
"off" | "agent-only" | "propose" | "allow" => {}
other => {
bail!("writes.mode must be one of off, agent-only, propose, allow; got {other}")
}
}
Ok(())
}
pub fn write_default_file(&self, force: bool) -> Result<()> {
let path = Self::default_path();
if path.exists() && !force {
return Err(anyhow!(
"{} already exists; pass --force to overwrite it",
path.display()
));
}
let raw = toml::to_string_pretty(self).context("failed to serialize default config")?;
fs::write(&path, raw).with_context(|| format!("failed to write {}", path.display()))
}
pub fn create_agent_dirs(&self) -> Result<()> {
let base = self.vault.path.join(&self.writes.agent_dir);
for dir in [
"memories",
"summaries",
"tasks",
"decisions",
"logs",
"cache",
] {
fs::create_dir_all(base.join(dir))
.with_context(|| format!("failed to create {}", base.join(dir).display()))?;
}
Ok(())
}
}
impl Default for Config {
fn default() -> Self {
Self {
vault: VaultConfig {
path: PathBuf::from("."),
},
index: IndexConfig {
include_agent_dir: true,
ignore_dirs: vec![
".git".to_string(),
".obsidian".to_string(),
".trash".to_string(),
".agent/cache".to_string(),
],
chunk_target_tokens: 500,
chunk_overlap_tokens: 80,
},
embeddings: EmbeddingsConfig {
backend: "ollama".to_string(),
model: "nomic-embed-text".to_string(),
url: "http://localhost:11434".to_string(),
},
search: SearchConfig {
semantic_weight: 0.55,
keyword_weight: 0.25,
recency_weight: 0.10,
link_weight: 0.05,
tag_weight: 0.05,
},
writes: WritesConfig {
mode: "agent-only".to_string(),
agent_dir: PathBuf::from(".agent"),
},
server: ServerConfig {
host: "127.0.0.1".to_string(),
port: 7331,
},
}
}
}

23
src/logging.rs Normal file
View File

@ -0,0 +1,23 @@
use anyhow::{Result, anyhow};
use tracing_subscriber::{EnvFilter, fmt};
pub fn init(debug: bool) -> Result<()> {
let default_level = if debug {
"glassmind=debug"
} else {
"glassmind=info"
};
let filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_level));
fmt()
.with_env_filter(filter)
.with_target(debug)
.with_file(debug)
.with_line_number(debug)
.compact()
.try_init()
.map_err(|err| anyhow!("failed to initialize logging: {err}"))?;
Ok(())
}

99
src/main.rs Normal file
View File

@ -0,0 +1,99 @@
mod cli;
mod config;
mod logging;
mod markdown;
mod vault;
use anyhow::Result;
use clap::Parser;
use tracing::{debug, info};
use crate::cli::{Cli, Commands, OutputFormat};
use crate::config::Config;
use crate::vault::VaultIndex;
fn main() -> Result<()> {
let cli = Cli::parse();
logging::init(cli.debug)?;
let config = Config::load(cli.config.as_deref())?.with_cli_vault(cli.vault);
config.validate()?;
debug!(?config, "loaded config");
match cli.command {
Commands::Init { force } => init_project(&config, force),
Commands::Index { json } => {
let index = VaultIndex::scan(&config)?;
if json {
println!("{}", serde_json::to_string_pretty(&index.summary())?);
} else {
println!("{}", index.summary());
}
Ok(())
}
Commands::Stats { json } => {
let index = VaultIndex::scan(&config)?;
if json {
println!("{}", serde_json::to_string_pretty(&index.summary())?);
} else {
println!("{}", index.summary());
}
Ok(())
}
Commands::Search {
query,
limit,
output,
} => {
let index = VaultIndex::scan(&config)?;
let results = index.search(&query, limit);
match output {
OutputFormat::Text => {
if results.is_empty() {
println!("No matches.");
}
for (position, result) in results.iter().enumerate() {
println!("{}. {}", position + 1, result.note.path.display());
println!(" title: {}", result.note.title);
if !result.note.headings.is_empty() {
println!(" headings: {}", result.note.headings.join(" > "));
}
println!(" score: {}", result.score);
}
}
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&results)?),
}
Ok(())
}
Commands::Context {
query,
limit,
output,
} => {
let index = VaultIndex::scan(&config)?;
let bundle = index.context_bundle(&query, limit);
match output {
OutputFormat::Text => println!("{}", bundle.to_markdown()),
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&bundle)?),
}
Ok(())
}
Commands::Serve => {
info!("serve command is reserved for the HTTP API milestone");
println!(
"HTTP API is not implemented yet. Planned bind: {}:{}",
config.server.host, config.server.port
);
Ok(())
}
}
}
fn init_project(config: &Config, force: bool) -> Result<()> {
config.write_default_file(force)?;
config.create_agent_dirs()?;
println!("Initialized Glassmind at {}", config.vault.path.display());
println!("Config: {}", Config::default_path().display());
Ok(())
}

254
src/markdown.rs Normal file
View File

@ -0,0 +1,254 @@
use regex::Regex;
use serde::Serialize;
#[derive(Clone, Debug, Serialize)]
pub struct MarkdownDocument {
pub headings: Vec<String>,
pub blocks: Vec<MarkdownBlock>,
pub wikilinks: Vec<Wikilink>,
}
#[derive(Clone, Debug, Serialize)]
pub struct MarkdownBlock {
pub kind: MarkdownBlockKind,
pub text: String,
pub start_line: usize,
pub end_line: usize,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum MarkdownBlockKind {
Heading,
Paragraph,
CodeBlock,
List,
}
#[derive(Clone, Debug, Serialize)]
pub struct Wikilink {
pub source: String,
pub target: String,
pub alias: Option<String>,
}
pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
let _ = pulldown_cmark::Parser::new_ext(content, pulldown_cmark::Options::all()).count();
let mut headings = Vec::new();
let mut blocks = Vec::new();
let mut paragraph = Vec::new();
let mut paragraph_start = 0;
let mut in_code = false;
let mut code = Vec::new();
let mut code_start = 0;
for (idx, line) in content.lines().enumerate() {
let line_no = idx + 1;
let trimmed = line.trim();
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
if in_code {
code.push(line.to_string());
blocks.push(MarkdownBlock {
kind: MarkdownBlockKind::CodeBlock,
text: code.join("\n"),
start_line: code_start,
end_line: line_no,
});
code.clear();
in_code = false;
} else {
flush_paragraph(
&mut blocks,
&mut paragraph,
paragraph_start,
line_no.saturating_sub(1),
);
in_code = true;
code_start = line_no;
code.push(line.to_string());
}
continue;
}
if in_code {
code.push(line.to_string());
continue;
}
if let Some(heading) = parse_heading(trimmed) {
flush_paragraph(
&mut blocks,
&mut paragraph,
paragraph_start,
line_no.saturating_sub(1),
);
headings.push(heading.clone());
blocks.push(MarkdownBlock {
kind: MarkdownBlockKind::Heading,
text: heading,
start_line: line_no,
end_line: line_no,
});
continue;
}
if is_list_item(trimmed) {
flush_paragraph(
&mut blocks,
&mut paragraph,
paragraph_start,
line_no.saturating_sub(1),
);
blocks.push(MarkdownBlock {
kind: MarkdownBlockKind::List,
text: trimmed.to_string(),
start_line: line_no,
end_line: line_no,
});
continue;
}
if trimmed.is_empty() {
flush_paragraph(
&mut blocks,
&mut paragraph,
paragraph_start,
line_no.saturating_sub(1),
);
continue;
}
if paragraph.is_empty() {
paragraph_start = line_no;
}
paragraph.push(trimmed.to_string());
}
let final_line = content.lines().count();
if in_code {
blocks.push(MarkdownBlock {
kind: MarkdownBlockKind::CodeBlock,
text: code.join("\n"),
start_line: code_start,
end_line: final_line,
});
}
flush_paragraph(&mut blocks, &mut paragraph, paragraph_start, final_line);
MarkdownDocument {
headings,
blocks,
wikilinks: extract_wikilinks(source_path, content),
}
}
fn flush_paragraph(
blocks: &mut Vec<MarkdownBlock>,
paragraph: &mut Vec<String>,
start_line: usize,
end_line: usize,
) {
if paragraph.is_empty() {
return;
}
blocks.push(MarkdownBlock {
kind: MarkdownBlockKind::Paragraph,
text: paragraph.join(" "),
start_line,
end_line,
});
paragraph.clear();
}
fn parse_heading(trimmed: &str) -> Option<String> {
let hashes = trimmed.chars().take_while(|c| *c == '#').count();
if (1..=6).contains(&hashes) && trimmed.chars().nth(hashes) == Some(' ') {
Some(trimmed[hashes + 1..].trim().to_string())
} else {
None
}
}
fn is_list_item(trimmed: &str) -> bool {
trimmed.starts_with("- ")
|| trimmed.starts_with("* ")
|| trimmed.starts_with("+ ")
|| trimmed.split_once(". ").is_some_and(|(prefix, _)| {
!prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit())
})
}
pub fn extract_wikilinks(source_path: &str, content: &str) -> Vec<Wikilink> {
let link_re = Regex::new(r"\[\[([^\]\|]+?)(?:\|([^\]]+))?\]\]").expect("valid wikilink regex");
link_re
.captures_iter(content)
.filter_map(|capture| {
let target = capture.get(1)?.as_str().trim().to_string();
if target.is_empty() {
return None;
}
let alias = capture
.get(2)
.map(|m| m.as_str().trim().to_string())
.filter(|s| !s.is_empty());
Some(Wikilink {
source: source_path.to_string(),
target,
alias,
})
})
.collect()
}
#[cfg(test)]
mod tests {
use super::{MarkdownBlockKind, extract_wikilinks, parse_markdown};
#[test]
fn extracts_obsidian_wikilink_forms() {
let links = extract_wikilinks(
"source.md",
"[[note]] [[note|alias]] [[folder/note]] [[folder/note#Heading|Alias]]",
);
assert_eq!(links.len(), 4);
assert_eq!(links[0].target, "note");
assert_eq!(links[0].alias, None);
assert_eq!(links[1].target, "note");
assert_eq!(links[1].alias.as_deref(), Some("alias"));
assert_eq!(links[2].target, "folder/note");
assert_eq!(links[3].target, "folder/note#Heading");
assert_eq!(links[3].alias.as_deref(), Some("Alias"));
}
#[test]
fn extracts_markdown_structure_from_malformed_input() {
let document = parse_markdown(
"note.md",
"# Title\n\nParagraph text\n\n- item\n\n```rust\nfn main() {}\n",
);
assert_eq!(document.headings, vec!["Title"]);
assert!(
document
.blocks
.iter()
.any(|block| matches!(block.kind, MarkdownBlockKind::Paragraph))
);
assert!(
document
.blocks
.iter()
.any(|block| matches!(block.kind, MarkdownBlockKind::List))
);
assert!(
document
.blocks
.iter()
.any(|block| matches!(block.kind, MarkdownBlockKind::CodeBlock))
);
}
}

312
src/vault.rs Normal file
View File

@ -0,0 +1,312 @@
use std::fmt;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::UNIX_EPOCH;
use anyhow::{Context, Result};
use serde::Serialize;
use tracing::{debug, warn};
use walkdir::{DirEntry, WalkDir};
use crate::config::Config;
use crate::markdown::{MarkdownBlock, Wikilink, parse_markdown};
#[derive(Clone, Debug, Serialize)]
pub struct VaultIndex {
pub vault_path: PathBuf,
pub notes: Vec<NoteMetadata>,
pub markdown_count: usize,
pub skipped_dirs: Vec<PathBuf>,
}
#[derive(Clone, Debug, Serialize)]
pub struct NoteMetadata {
pub path: PathBuf,
pub filename: String,
pub title: String,
pub modified_unix_secs: Option<u64>,
pub file_size: u64,
pub headings: Vec<String>,
pub blocks: Vec<MarkdownBlock>,
pub wikilinks: Vec<Wikilink>,
}
#[derive(Clone, Debug, Serialize)]
pub struct IndexSummary {
pub vault_path: PathBuf,
pub notes_indexed: usize,
pub markdown_files: usize,
pub headings: usize,
pub blocks: usize,
pub wikilinks: usize,
pub skipped_dirs: Vec<PathBuf>,
}
#[derive(Clone, Debug, Serialize)]
pub struct SearchResult {
pub note: NoteMetadata,
pub score: usize,
}
#[derive(Clone, Debug, Serialize)]
pub struct ContextBundle {
pub query: String,
pub sources: Vec<SearchResult>,
}
impl VaultIndex {
pub fn scan(config: &Config) -> Result<Self> {
let vault_path = config
.vault
.path
.canonicalize()
.unwrap_or_else(|_| config.vault.path.clone());
let mut notes = Vec::new();
let mut skipped_dirs = Vec::new();
let walker = WalkDir::new(&config.vault.path)
.follow_links(false)
.into_iter()
.filter_entry(|entry| {
should_enter(entry, &config.vault.path, config, &mut skipped_dirs)
});
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("skipping unreadable path: {err}");
continue;
}
};
if !entry.file_type().is_file() || !is_markdown(entry.path()) {
continue;
}
let note = read_note(entry.path(), &config.vault.path)?;
debug!(
path = %note.path.display(),
title = %note.title,
size = note.file_size,
headings = note.headings.len(),
links = note.wikilinks.len(),
"indexed note metadata"
);
notes.push(note);
}
notes.sort_by(|a, b| a.path.cmp(&b.path));
let markdown_count = notes.len();
Ok(Self {
vault_path,
notes,
markdown_count,
skipped_dirs,
})
}
pub fn summary(&self) -> IndexSummary {
IndexSummary {
vault_path: self.vault_path.clone(),
notes_indexed: self.notes.len(),
markdown_files: self.markdown_count,
headings: self.notes.iter().map(|note| note.headings.len()).sum(),
blocks: self.notes.iter().map(|note| note.blocks.len()).sum(),
wikilinks: self.notes.iter().map(|note| note.wikilinks.len()).sum(),
skipped_dirs: self.skipped_dirs.clone(),
}
}
pub fn search(&self, query: &str, limit: usize) -> Vec<SearchResult> {
let terms = query_terms(query);
let mut results: Vec<_> = self
.notes
.iter()
.filter_map(|note| {
let haystack = format!(
"{} {} {}",
note.path.display(),
note.title,
note.blocks
.iter()
.map(|block| block.text.as_str())
.collect::<Vec<_>>()
.join(" ")
)
.to_lowercase();
let score = terms
.iter()
.filter(|term| haystack.contains(term.as_str()))
.count();
(score > 0).then(|| SearchResult {
note: note.clone(),
score,
})
})
.collect();
results.sort_by(|a, b| {
b.score
.cmp(&a.score)
.then_with(|| a.note.path.cmp(&b.note.path))
});
results.truncate(limit);
results
}
pub fn context_bundle(&self, query: &str, limit: usize) -> ContextBundle {
ContextBundle {
query: query.to_string(),
sources: self.search(query, limit),
}
}
}
impl ContextBundle {
pub fn to_markdown(&self) -> String {
let mut out = format!("# Glassmind Context\n\nQuery: `{}`\n\n", self.query);
if self.sources.is_empty() {
out.push_str("No matching markdown notes were found.\n");
return out;
}
out.push_str("## Sources\n\n");
for (idx, result) in self.sources.iter().enumerate() {
out.push_str(&format!(
"{}. `{}` - score {}\n",
idx + 1,
result.note.path.display(),
result.score
));
out.push_str(&format!(" - title: {}\n", result.note.title));
if !result.note.headings.is_empty() {
out.push_str(&format!(
" - headings: {}\n",
result.note.headings.join(" > ")
));
}
if !result.note.wikilinks.is_empty() {
let links = result
.note
.wikilinks
.iter()
.map(|link| match &link.alias {
Some(alias) => format!("{} as {}", link.target, alias),
None => link.target.clone(),
})
.collect::<Vec<_>>()
.join(", ");
out.push_str(&format!(" - wikilinks: {links}\n"));
}
}
out
}
}
impl fmt::Display for IndexSummary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Vault: {}", self.vault_path.display())?;
writeln!(f, "Notes indexed: {}", self.notes_indexed)?;
writeln!(f, "Markdown files: {}", self.markdown_files)?;
writeln!(f, "Headings parsed: {}", self.headings)?;
writeln!(f, "Markdown blocks: {}", self.blocks)?;
writeln!(f, "Wikilinks: {}", self.wikilinks)?;
writeln!(f, "Skipped dirs: {}", self.skipped_dirs.len())
}
}
fn read_note(path: &Path, vault_path: &Path) -> Result<NoteMetadata> {
let content =
fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?;
let metadata =
fs::metadata(path).with_context(|| format!("failed to stat {}", path.display()))?;
let relative_path = path.strip_prefix(vault_path).unwrap_or(path).to_path_buf();
let source_path = relative_path.to_string_lossy().replace('\\', "/");
let parsed = parse_markdown(&source_path, &content);
Ok(NoteMetadata {
path: relative_path,
filename: path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or_default()
.to_string(),
title: extract_title(path, &parsed.headings),
modified_unix_secs: metadata
.modified()
.ok()
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
.map(|duration| duration.as_secs()),
file_size: metadata.len(),
headings: parsed.headings,
blocks: parsed.blocks,
wikilinks: parsed.wikilinks,
})
}
fn extract_title(path: &Path, headings: &[String]) -> String {
headings.first().cloned().unwrap_or_else(|| {
path.file_stem()
.and_then(|stem| stem.to_str())
.unwrap_or("Untitled")
.to_string()
})
}
fn should_enter(
entry: &DirEntry,
vault_path: &Path,
config: &Config,
skipped_dirs: &mut Vec<PathBuf>,
) -> bool {
if !entry.file_type().is_dir() {
return true;
}
let relative = entry
.path()
.strip_prefix(vault_path)
.unwrap_or(entry.path());
if relative.as_os_str().is_empty() {
return true;
}
let normalized = relative.to_string_lossy().replace('\\', "/");
let ignored = config
.index
.ignore_dirs
.iter()
.any(|ignore| normalized == *ignore || normalized.starts_with(&format!("{ignore}/")));
let agent_excluded = !config.index.include_agent_dir
&& normalized
.split('/')
.next()
.is_some_and(|component| component == config.writes.agent_dir.to_string_lossy());
if ignored || agent_excluded {
skipped_dirs.push(relative.to_path_buf());
false
} else {
true
}
}
fn is_markdown(path: &Path) -> bool {
path.extension()
.and_then(|extension| extension.to_str())
.is_some_and(|extension| extension.eq_ignore_ascii_case("md"))
}
fn query_terms(query: &str) -> Vec<String> {
query
.split_whitespace()
.map(|term| {
term.trim_matches(|c: char| !c.is_alphanumeric())
.to_lowercase()
})
.filter(|term| !term.is_empty())
.collect()
}