From fc9b2efd0b9288499a8eac59d0acb6431305ab8d Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Sun, 24 May 2026 02:18:08 -0700 Subject: [PATCH] Initialize Glassmind Rust CLI and markdown vault scanner --- .gitignore | 4 +- Cargo.lock | 595 ++++++++++++++++++++++ Cargo.toml | 16 + README.md | 230 ++++++++- docs/design.md | 1199 +++++++++++++++++++++++++++++++++++++++++++++ docs/dev/tasks.md | 986 +++++++++++++++++++++++++++++++++++++ docs/faq.md | 535 ++++++++++++++++++++ docs/huh.md | 470 ++++++++++++++++++ examples/.gitkeep | 1 + fixtures/.gitkeep | 1 + glassmind.toml | 33 ++ scripts/.gitkeep | 1 + src/cli.rs | 70 +++ src/config.rs | 174 +++++++ src/logging.rs | 23 + src/main.rs | 99 ++++ src/markdown.rs | 254 ++++++++++ src/vault.rs | 312 ++++++++++++ 18 files changed, 5000 insertions(+), 3 deletions(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 docs/design.md create mode 100644 docs/dev/tasks.md create mode 100644 docs/faq.md create mode 100644 docs/huh.md create mode 100644 examples/.gitkeep create mode 100644 fixtures/.gitkeep create mode 100644 glassmind.toml create mode 100644 scripts/.gitkeep create mode 100644 src/cli.rs create mode 100644 src/config.rs create mode 100644 src/logging.rs create mode 100644 src/main.rs create mode 100644 src/markdown.rs create mode 100644 src/vault.rs diff --git a/.gitignore b/.gitignore index 0104787..36374c7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ # will have compiled files and executables debug/ target/ +.agent/ +docs/codex/ # These are backup files generated by rustfmt **/*.rs.bk @@ -14,4 +16,4 @@ target/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..55598d6 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,595 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "glassmind" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "pulldown-cmark", + "regex", + "serde", + "serde_json", + "toml", + "tracing", + "tracing-subscriber", + "walkdir", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pulldown-cmark" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f068eba8e7071c5f9511831b44f32c740d5adf574e990f946ddb53db2f314e" +dependencies = [ + "bitflags", + "getopts", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" +dependencies = [ + "serde_core", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "toml" +version = "0.9.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow 0.7.15", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.3", +] + +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" + +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..06cb75a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "glassmind" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } +pulldown-cmark = "0.13" +regex = "1.11" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +toml = "0.9" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } +walkdir = "2.5" diff --git a/README.md b/README.md index cd0684d..f42989f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,228 @@ -# glassMind -Local-first RAG and memory infrastructure for Obsidian vaults. +# Glassmind + +> Local-first semantic retrieval for Obsidian-like markdown knowledge bases and AI workflows. + +* This is in development, it doesn't run yet. Want to help? Get in contact! * + +Glassmind turns folders of markdown notes into searchable semantic memory for AI tools and humans. + +It works especially well with Obsidian vaults, but Obsidian is not required. + +It indexes markdown, understands links/tags/headings, performs hybrid semantic retrieval, and exposes context through a CLI, HTTP API, and MCP tools. + +Your notes stay local. +Your vault stays canonical. +The database is rebuildable. +No cloud required. + +--- + +## What is this? + +Glassmind is **not**: + +* a chatbot +* an obsidian plugin +* an autonomous agent +* a replacement for Obsidian +* a SaaS startup trying to ingest your second brain into a valuation event + +Glassmind is a **memory and retrieval layer**. + +Think: + +```text +Claude / Codex / Hermes / local model + ↓ + Glassmind + ↓ + Your Obsidian vault +``` + +The goal is simple: + +> “Given this task, what context from my vault actually matters?” + +--- + +# Features + +## Current / Planned + +* Markdown vault indexing +* Semantic search +* Hybrid retrieval + + * embeddings + * keyword search + * tags + * wikilinks + * recency +* Context bundle generation +* MCP integration +* HTTP API +* Local-first operation +* Rebuildable indexes +* Incremental indexing +* Agent-safe `.agent/` workspace +* Obsidian-compatible by default + +--- + +# Philosophy + +Glassmind treats your vault like memory, not files. + +```text +Obsidian markdown = source of truth +SQLite = rebuildable index/cache +Embeddings = semantic retrieval layer +``` + +Your notes remain human-readable markdown. + +Glassmind exists to make retrieval useful, fast, and agent-friendly without turning your vault into proprietary soup. + +--- + +# Example + +```bash +glassmind index + +glassmind search "local memory tool ideas" + +glassmind context "help me continue the Glassmind project" + +glassmind serve +``` + +--- + +# Why? + +Because existing “AI memory” systems tend to be one of: + +* cloud-first +* opaque +* startup-shaped +* agent-shaped +* overengineered +* weirdly hostile to user ownership + +Meanwhile, many of us are already using Obsidian as informal long-term memory. + +Glassmind formalizes that idea. + +--- + +# Documentation + +* [Design Document](docs/design.md) +* [FAQ](docs/faq.md) +* [HUH? (Beginners ELI5 guide)](docs/huh.md) + +--- + +# Architecture + +```text +Obsidian Vault + ↓ +Indexer + ↓ +SQLite + Vector Search + ↓ +CLI / HTTP / MCP + ↓ +Agents and local models +``` + +--- + +# Tech Stack + +Planned v1 stack: + +```text +Rust +SQLite +sqlite-vec +Ollama embeddings +Axum +MCP +``` + +--- + +# Status + +Early development. + +Currently building: + +* vault indexer +* chunking +* semantic retrieval +* context generation + +--- + +# Security / Privacy + +Glassmind is designed to run locally. + +By default: + +* binds to localhost +* keeps notes local +* avoids modifying user notes +* stores indexes separately +* treats markdown as canonical + +No telemetry is planned. + +No cloud dependency is required. + +No “AI-enhanced knowledge monetization platform” nonsense. + +No enshitification ever. I stake my professional reputation on it. + +--- + +# Name + +Why “Glassmind”? + +Because it’s supposed to feel like peering through semantic glass into your own thoughts. + +Also because `brainworm` felt a little aggressive for a tool people may actually deploy at work. + + +--- + +# Contributing + +Eventually. + +Right now the project is still in the “rapid architectural mutation” phase. + +If you want to throw me a PR or two I'll give you one (1) really good compliment. + +--- + +# Legal + +Glassmind is an independent project and is not affiliated with or endorsed by [Obsidian](https://obsidian.md). + +--- + +# I am a recruiter + +Hi. + +You may also enjoy: + +* [LinkedIn / khodges42](https://linkedin.com/in/khodges42?utm_source=chatgpt.com) + + diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..fd814e3 --- /dev/null +++ b/docs/design.md @@ -0,0 +1,1199 @@ + +# Glassmind Design Doc + +## One-line Summary + +Glassmind is a local-first RAG and memory API for Obsidian vaults. It indexes markdown notes, performs semantic and metadata-aware search, and exposes useful context to external agents such as Claude, Codex, Hermes, Nightshift, or local models. + +## Non-goals + +Glassmind is not: + +- an autonomous agent +- a replacement for Obsidian +- a cloud memory service +- a chatbot +- a startup-brain-theft machine +- a magic AI filesystem + +Glassmind is the memory layer. + +Agents call Glassmind when they need context. + +--- + +# 1. Core Philosophy + +## Source of Truth + +Obsidian markdown files are canonical. + +The database is rebuildable. + +```text +Obsidian vault = truth +SQLite DB = index/cache +Vector index = semantic search cache +.agent/ = Glassmind-owned workspace +```` + +If the database is deleted, Glassmind should be able to rebuild it from the vault. + +## Ownership Boundary + +User notes belong to the user. + +Glassmind may freely write to: + +```text +.agent/ +``` + +Editing normal vault files should be optional and policy-controlled. + +--- + +# 2. High-Level Architecture + +```text +Obsidian Vault + ↓ +Glassmind Indexer + ↓ +SQLite Metadata Store + ↓ +Vector Search Layer + ↓ +CLI / HTTP API / MCP Server + ↓ +Claude / Codex / Hermes / Nightshift / Local Models +``` + +## Components + +### Indexer + +Walks the Obsidian vault and reads markdown files. + +It extracts: + +- file path + +- title + +- headings + +- sections + +- tags + +- wikilinks + +- frontmatter + +- modified time + +- content hash + +- chunks for retrieval + + +### SQLite Store + +Stores structured metadata. + +Examples: + +```text +notes +chunks +tags +links +headings +frontmatter +index_runs +``` + +### Vector Store + +Stores embeddings for chunks. + +For v1: + +```text +sqlite-vec +``` + +Later options: + +```text +Qdrant +LanceDB +Chroma +``` + +### Embedding Backend + +Embeddings convert text into “meaning coordinates.” + +Example: + +```text +"local agent memory" +``` + +becomes a vector like: + +```text +[0.12, -0.44, 0.89, ...] +``` + +Glassmind compares vectors to find semantically related chunks. + +Recommended v1: + +```text +Ollama embeddings +``` + +Later: + +```text +fastembed-rs +``` + +### API Layer + +Glassmind exposes: + +- CLI commands + +- HTTP API + +- MCP tools + + +MCP means “Model Context Protocol.” It is a standard-ish way for AI tools to call external tools. + +--- + +# 3. Vault Layout + +Glassmind should work with any Obsidian structure. + +It may optionally create: + +```text +vault/ + .agent/ + memories/ + summaries/ + tasks/ + decisions/ + logs/ + cache/ +``` + +## `.agent/` Purpose + +This is the safe agent-owned area. + +It can contain: + +- generated project summaries + +- captured memories + +- task state + +- context snapshots + +- audit logs + +- retrieval reports + + +Normal user notes are indexed, but not modified by default. + +--- + +# 4. Core Concepts + +## RAG + +RAG means “Retrieval-Augmented Generation.” + +Instead of shoving the entire vault into an LLM prompt, Glassmind retrieves only relevant notes and chunks. + +```text +query → retrieve relevant context → give context to LLM +``` + +## Embeddings + +Embeddings are semantic fingerprints. + +They let Glassmind find notes by meaning, not just exact words. + +Useful for finding: + +```text +"that thing I wrote about local memory" +``` + +even if the note says: + +```text +"persistent semantic cache for agents" +``` + +## Chunk + +A chunk is a small section of a note. + +Usually: + +- heading section + +- paragraph group + +- checklist block + +- code block + +- fixed-size fallback slice + + +Glassmind searches chunks, not entire notes. + +## Hot / Warm / Cold Memory + +Glassmind does not forget by default. + +Instead, it ranks memory temperature. + +```text +Hot: + recent notes + active projects + pinned context + recently retrieved chunks + +Warm: + related notes + linked notes + older project notes + +Cold: + everything else +``` + +Hot memory is more likely to appear in context bundles. + +## Context Bundle + +A context bundle is an LLM-ready packet. + +Example: + +```text +User query: + "help me continue Glassmind" + +Context bundle: + - top matching chunks + - related project notes + - recent decisions + - open tasks + - relevant links + - source paths +``` + +This is probably the most important Glassmind feature. + +--- + +# 5. API Design + +## CLI + +```bash +glassmind init +glassmind index +glassmind search "local memory tool" +glassmind context "help me continue designing Glassmind" +glassmind read "Projects/Glassmind.md" +glassmind serve +``` + +## HTTP API + +```http +POST /search +POST /context +GET /notes/{id} +POST /index +GET /health +``` + +## MCP Tools + +```text +glassmind_search +glassmind_context +glassmind_read +glassmind_hot_context +``` + +--- + +# 6. First-Class Commands + +## `search` + +Returns matching chunks. + +```bash +glassmind search "obsidian rag memory" +``` + +Output: + +```text +1. Projects/Glassmind.md#Architecture +2. Daily/2026-05-24.md#Local Agents +3. Software Projects/Memory Tool.md#Embeddings +``` + +## `context` + +Returns an LLM-ready context bundle. + +```bash +glassmind context "what was I thinking about local agent memory?" +``` + +This should include: + +- summarized answer + +- relevant chunks + +- source paths + +- confidence/ranking + +- suggested follow-up reads + + +## `index` + +Builds or updates the index. + +```bash +glassmind index +``` + +Should support incremental indexing. + +## `serve` + +Runs local API server. + +```bash +glassmind serve +``` + +Default: + +```text +localhost only +``` + +No public network exposure by default. + +--- + +# 7. Retrieval Strategy + +Do not rely on only one method. + +Glassmind should use hybrid scoring. + +```text +final_score = + semantic similarity ++ keyword match ++ tag match ++ path/project boost ++ wikilink proximity ++ recency boost ++ hot memory boost +``` + +## Why Hybrid Search? + +Embeddings are good, but not perfect. + +Keyword search is good, but brittle. + +Tags are useful, but manually inconsistent. + +Graph links are meaningful, but incomplete. + +Hybrid search makes the system feel smarter. + +--- + +# 8. Data Model Draft + +## notes + +```text +id +path +title +mtime +content_hash +created_at +updated_at +``` + +## chunks + +```text +id +note_id +heading_path +content +chunk_type +start_line +end_line +token_estimate +content_hash +``` + +## tags + +```text +id +name +``` + +## note_tags + +```text +note_id +tag_id +``` + +## links + +```text +id +source_note_id +target +link_type +``` + +## embeddings + +```text +chunk_id +model +vector +created_at +``` + +## memory_events + +```text +id +event_type +source +content +created_at +``` + +--- + +# 9. Write Policy + +Glassmind should support write policies. + +## Recommended Defaults + +```toml +[writes] +agent_dir = true +user_notes = "propose" +``` + +Modes: + +```text +off: + no writes + +agent-only: + writes only to .agent/ + +propose: + produce diffs for user approval + +allow: + direct edits to user notes +``` + +For your personal setup, you might allow more. + +For a sane default, use: + +```text +agent-only + proposed diffs +``` + +--- + +# 10. Implementation Stages + +## Stage 0 — Skeleton + +Goal: project exists and runs. + +Tasks: + +- create Rust project + +- add config file support + +- add CLI parser + +- define vault path + +- add logging + +- add `glassmind init` + +- create `.agent/` directory + + +Suggested crates: + +```text +clap +serde +toml +tracing +anyhow +``` + +--- + +## Stage 1 — Vault Indexer + +Goal: read markdown vault and store metadata. + +Tasks: + +- recursively walk vault + +- ignore `.obsidian/`, `.git/`, `.agent/cache/` + +- read `.md` files + +- calculate content hash + +- extract title + +- extract tags + +- extract wikilinks + +- extract headings + +- store note records in SQLite + + +Suggested crates: + +```text +walkdir +rusqlite +sha2 +pulldown-cmark +gray_matter +regex +``` + +Success test: + +```bash +glassmind index +glassmind stats +``` + +Shows: + +```text +Notes indexed: 1,247 +Chunks indexed: 4,912 +Tags indexed: 340 +Links indexed: 2,103 +``` + +--- + +## Stage 2 — Chunking + +Goal: split notes into useful retrieval units. + +Tasks: + +- split by markdown headings + +- preserve heading path + +- preserve line numbers + +- fallback to size-based chunks for long sections + +- store chunks in SQLite + + +Chunk types: + +```text +heading_section +paragraph +task_block +code_block +frontmatter +``` + +Success test: + +```bash +glassmind read-chunks "Projects/Glassmind.md" +``` + +--- + +## Stage 3 — Keyword Search + +Goal: useful search before embeddings. + +Tasks: + +- add SQLite FTS5 table + +- index chunk text + +- implement `glassmind search` + +- return path, heading, snippet, score + + +FTS means “full-text search.” + +It is SQLite’s built-in text search engine. + +Success test: + +```bash +glassmind search "obsidian memory" +``` + +--- + +## Stage 4 — Embeddings + +Goal: semantic search. + +Tasks: + +- add embedding backend trait + +- implement Ollama embedding backend + +- store vectors in sqlite-vec + +- embed chunks + +- embed queries + +- return nearest chunks + + +Backend trait: + +```rust +trait EmbeddingBackend { + fn embed(&self, text: &str) -> Result>; +} +``` + +Config: + +```toml +[embeddings] +backend = "ollama" +model = "nomic-embed-text" +``` + +Success test: + +```bash +glassmind search "thing about my local second brain" +``` + +Finds notes that do not literally say those words. + +--- + +## Stage 5 — Hybrid Ranking + +Goal: search feels good. + +Tasks: + +- combine semantic score + +- combine keyword score + +- boost recent notes + +- boost matching tags + +- boost active project paths + +- boost wikilink neighbors + +- show score breakdown in debug mode + + +Example debug output: + +```text +score: 0.87 +semantic: 0.52 +keyword: 0.18 +recency: 0.07 +tag: 0.05 +link: 0.05 +``` + +--- + +## Stage 6 — Context Bundles + +Goal: make output useful for agents. + +Tasks: + +- implement `glassmind context` + +- deduplicate chunks from same note + +- group by note + +- summarize or trim context + +- include source paths + +- respect token budget + +- output markdown and JSON + + +Example: + +```bash +glassmind context "continue designing Glassmind" --budget 6000 +``` + +Output sections: + +```text +Relevant Notes +Recent Decisions +Open Questions +Suggested Context +Sources +``` + +--- + +## Stage 7 — HTTP Server + +Goal: let tools call Glassmind. + +Tasks: + +- add local HTTP server + +- implement `/search` + +- implement `/context` + +- implement `/notes/{id}` + +- implement `/health` + +- bind to localhost by default + + +Suggested crate: + +```text +axum +``` + +Success test: + +```bash +curl localhost:7331/health +``` + +--- + +## Stage 8 — MCP Server + +Goal: Claude/Codex/Hermes can call Glassmind. + +Tasks: + +- expose MCP tools + +- implement search tool + +- implement context tool + +- implement read tool + +- document setup + + +Tools: + +```text +glassmind_search +glassmind_context +glassmind_read +``` + +--- + +## Stage 9 — Agent-Owned Memory + +Goal: allow safe writes to `.agent/`. + +Tasks: + +- `capture-memory` + +- `capture-decision` + +- `capture-task` + +- append markdown files in `.agent/` + +- index `.agent/` files + +- keep audit log + + +Example: + +```bash +glassmind capture decision \ + --project Glassmind \ + --text "Obsidian markdown is canonical; SQLite is rebuildable cache." +``` + +--- + +## Stage 10 — Polish / Real Use + +Goal: make it worth using daily. + +Tasks: + +- config docs + +- better errors + +- incremental indexing + +- watch mode + +- pretty CLI output + +- JSON output + +- MCP examples + +- benchmark indexing + +- backup/rebuild story + +- test vault fixture + + +--- + +# 11. MVP Definition + +The MVP is successful when this works: + +```bash +glassmind index +glassmind search "what was I thinking about Obsidian RAG?" +glassmind context "help me continue the Glassmind project" +glassmind serve +``` + +And an external agent can call: + +```text +glassmind_context +``` + +to get useful context from the vault. + +--- + +# 12. Risks + +## Risk: Overengineering + +Mitigation: + +- CLI first + +- search first + +- no autonomous agent + +- no graph DB in v1 + + +## Risk: Bad Retrieval + +Mitigation: + +- hybrid search + +- score debugging + +- manual eval queries + +- source visibility + + +## Risk: Vault Corruption + +Mitigation: + +- user notes read-only by default + +- `.agent/` writes only + +- proposed diffs for user files + +- audit log + + +## Risk: Slow Indexing + +Mitigation: + +- content hashes + +- incremental updates + +- skip unchanged files + + +## Risk: AI Slop in Vault + +Mitigation: + +- agent output goes to `.agent/inbox/` + +- user approval before promoting content + +- clear generated-content markers + + +--- + +# 13. Opinionated v1 Tech Stack + +```text +Language: + Rust + +CLI: + clap + +HTTP: + axum + +Database: + SQLite + +SQL access: + rusqlite or sqlx + +Vector search: + sqlite-vec + +Markdown parsing: + pulldown-cmark + +Frontmatter: + gray_matter + +Embeddings: + Ollama first + +Logging: + tracing + +Config: + glassmind.toml +``` + +--- + +# 14. Example Config + +```toml +[vault] +path = "C:/Users/kass/Documents/ObsidianVault" + +[index] +include_agent_dir = true +ignore_dirs = [".git", ".obsidian", ".trash"] +chunk_target_tokens = 500 +chunk_overlap_tokens = 80 + +[embeddings] +backend = "ollama" +model = "nomic-embed-text" +url = "http://localhost:11434" + +[search] +semantic_weight = 0.55 +keyword_weight = 0.25 +recency_weight = 0.10 +link_weight = 0.05 +tag_weight = 0.05 + +[writes] +mode = "agent-only" +agent_dir = ".agent" + +[server] +host = "127.0.0.1" +port = 7331 +``` + +--- + +# 15. Glossary + +## Agent + +An AI-driven system that can use tools. + +Examples: + +```text +Claude Code +Codex +Hermes +Nightshift +Kiro +``` + +Glassmind is not the agent. + +Glassmind is the tool the agent calls. + +## RAG + +Retrieval-Augmented Generation. + +A system retrieves relevant context before the LLM answers. + +## Embedding + +A vector representation of text meaning. + +Used for semantic search. + +## Vector + +A list of numbers representing meaning. + +## Vector Search + +Finding vectors near another vector. + +This finds semantically similar text. + +## Chunk + +A smaller piece of a note used for retrieval. + +## FTS + +Full-text search. + +Keyword search built into SQLite. + +## MCP + +Model Context Protocol. + +A way for AI tools to call external tools. + +## Canonical Source + +The real source of truth. + +For Glassmind, this is the Obsidian markdown vault. + +## Cache + +Rebuildable derived data. + +The SQLite database is a cache/index, not the truth. + +## Hot Memory + +Recently or frequently useful context. + +## Cold Memory + +Old context that is still searchable but not automatically included. + +--- + +# 16. North Star + +Glassmind succeeds if an external agent can ask: + +```text +“What context from my Obsidian vault matters for this?” +``` + +And Glassmind returns something good enough that the agent feels like it actually remembers your projects. + +Not fake memory. + +Not chatbot vibes. + +Actual local context retrieval over your own notes. \ No newline at end of file diff --git a/docs/dev/tasks.md b/docs/dev/tasks.md new file mode 100644 index 0000000..eb1c17c --- /dev/null +++ b/docs/dev/tasks.md @@ -0,0 +1,986 @@ +```md id="v2l7nq" +# tasks.md + +# Glassmind Tasks + +## Project Rules + +- Prefer small, shippable tasks. +- Every stage should leave the project runnable. +- Avoid premature abstraction. +- Favor inspectability over magic. +- Small application philosophy +- Markdown files are canonical. +- Database state must be rebuildable. +- Local-first is a hard requirement. +- No cloud dependency in core architecture. +- No enshittification. + +--- + +# Phase 1 — Project Skeleton & Foundations + +## [x] GM-001 — Initialize Rust workspace + +### Goals +- Create Rust project +- Verify build pipeline +- Establish workspace structure + +### Tasks +- Run `cargo init` +- Create `/src` + +- Create `/examples` +- Create `/fixtures` +- Create `/scripts` +- Create initial `.gitignore` +- Add GPL +- Verify clean build + +### Acceptance Criteria +- `cargo build` succeeds +- Repo structure exists +- Project compiles on clean machine + +--- + +## [x] GM-002 — Add core dependencies + +### Goals +Install foundational crates. + +### Tasks +Add: +- `clap` +- `serde` +- `serde_json` +- `toml` +- `tracing` +- `tracing-subscriber` +- `anyhow` + +### Acceptance Criteria +- Project builds +- Logging works +- Config parsing stub exists + +--- + +## [x] GM-003 — Implement CLI skeleton + +### Goals +Create top-level CLI interface. + +### Tasks +Add commands: +- `init` +- `index` +- `search` +- `context` +- `serve` +- `stats` + +### Acceptance Criteria +- `glassmind --help` works +- Subcommands render correctly +- Unknown commands fail cleanly + +--- + +## [x] GM-004 — Create config loader + +### Goals +Load user config from disk. + +### Tasks +- Define `glassmind.toml` +- Create config structs +- Implement config parsing +- Add defaults +- Add validation +- Add config path resolution + +### Acceptance Criteria +- Config loads successfully +- Missing config generates defaults +- Invalid config errors clearly + +--- + +## [x] GM-005 — Implement logging setup + +### Goals +Establish consistent logging. + +### Tasks +- Configure tracing subscriber +- Add log levels +- Add debug mode +- Add structured logs +- Add startup logging + +### Acceptance Criteria +- Logs visible in CLI +- Debug mode works +- Errors produce stack traces + +--- + +# Phase 2 — Vault Discovery + +## [x] GM-006 — Implement vault walker + +### Goals +Recursively discover markdown files. + +### Tasks +- Add `walkdir` +- Walk configured vault path +- Detect `.md` files +- Skip ignored directories +- Support nested folders +- Add file count metrics + +### Acceptance Criteria +- Vault scan succeeds +- Ignores work correctly +- Correct markdown count displayed + +--- + +## [x] GM-007 — Implement ignore handling + +### Goals +Allow configurable ignore patterns. + +### Tasks +Ignore: +- `.git` +- `.obsidian` +- `.trash` +- `.agent/cache` + +Add configurable ignores. + +### Acceptance Criteria +- Ignored folders skipped +- Configurable ignores work +- No accidental recursion + +--- + +## [x] GM-008 — Add note metadata extraction + +### Goals +Extract basic note metadata. + +### Tasks +Extract: +- path +- filename +- title +- modified timestamp +- file size + +### Acceptance Criteria +- Metadata visible in debug output +- Data stored internally + +--- + +## [x] GM-009 — Add markdown parsing + +### Goals +Parse markdown structure. + +### Tasks +Add: +- heading extraction +- paragraph extraction +- code block detection +- list detection + +Suggested crate: +- `pulldown-cmark` + +### Acceptance Criteria +- Headings parsed correctly +- Parser handles malformed markdown gracefully + +--- + +## [x] GM-010 — Extract wikilinks + +### Goals +Detect Obsidian-style links. + +### Tasks +Support: +- `[[note]]` +- `[[note|alias]]` +- `[[folder/note]]` + +Store: +- source +- target +- alias + +### Acceptance Criteria +- Links parsed correctly +- Links stored in memory + +--- + +## [ ] GM-011 — Extract tags + +### Goals +Parse tags from notes. + +### Tasks +Support: +- inline tags +- frontmatter tags + +Normalize: +- lowercase +- trim whitespace + +### Acceptance Criteria +- Tags extracted consistently +- Duplicate tags removed + +--- + +# Phase 3 — Database Layer + +## [ ] GM-012 — Add SQLite integration + +### Goals +Create local metadata database. + +### Tasks +- Add SQLite crate +- Create DB initialization +- Create migrations +- Create schema bootstrap + +### Acceptance Criteria +- DB initializes automatically +- Schema created successfully + +--- + +## [ ] GM-013 — Create notes table + +### Goals +Store note metadata. + +### Tasks +Create schema for: +- notes +- paths +- timestamps +- hashes + +### Acceptance Criteria +- Notes persist correctly +- Duplicate handling works + +--- + +## [ ] GM-014 — Create chunks table + +### Goals +Store retrieval chunks. + +### Tasks +Store: +- note ID +- chunk content +- heading path +- line numbers +- token estimates + +### Acceptance Criteria +- Chunks persist correctly +- Relationships resolve correctly + +--- + +## [ ] GM-015 — Add content hashing + +### Goals +Detect changed notes efficiently. + +### Tasks +- Add SHA256 hashing +- Hash note content +- Compare hashes on reindex +- Skip unchanged files + +### Acceptance Criteria +- Incremental indexing works +- Unchanged files skipped + +--- + +# Phase 4 — Chunking + +## [ ] GM-016 — Implement heading-based chunking + +### Goals +Split notes into useful retrieval units. + +### Tasks +- Split by heading +- Preserve heading hierarchy +- Preserve ordering +- Preserve note references + +### Acceptance Criteria +- Chunks remain readable +- Context boundaries make sense + +--- + +## [ ] GM-017 — Add fallback chunk splitting + +### Goals +Handle giant sections safely. + +### Tasks +- Add max chunk size +- Add overlap windows +- Preserve sentence boundaries if possible + +### Acceptance Criteria +- Large files chunk correctly +- No giant retrieval blobs + +--- + +## [ ] GM-018 — Estimate token counts + +### Goals +Prepare for LLM context budgeting. + +### Tasks +- Add rough token estimator +- Store token counts +- Expose in debug mode + +### Acceptance Criteria +- Estimates reasonably accurate +- Context budgeting possible + +--- + +# Phase 5 — Search + +## [ ] GM-019 — Implement SQLite FTS search + +### Goals +Add keyword search. + +### Tasks +- Enable FTS5 +- Create search index +- Implement search query +- Add snippet extraction +- Add ranking + +### Acceptance Criteria +- Search returns relevant results +- Results ranked correctly + +--- + +## [ ] GM-020 — Implement basic CLI search command + +### Goals +Expose usable search interface. + +### Tasks +- Add search formatting +- Show paths +- Show headings +- Show snippets +- Add JSON output option + +### Acceptance Criteria +- `glassmind search` usable daily +- Results readable +- JSON output valid + +--- + +```md id="5m9zsw" +## Embeddings + +### [ ] GM-021 — Create embedding backend trait + +#### Goals +Abstract embedding providers behind a common interface. + +#### Tasks +- Create `EmbeddingBackend` trait +- Define embedding request/response types +- Add async support if needed +- Add error handling +- Add provider config support + +#### Acceptance Criteria +- Multiple backends can implement trait +- Search pipeline independent from provider implementation + +--- + +### [ ] GM-022 — Implement Ollama embedding backend + +#### Goals +Generate embeddings locally using Ollama. + +#### Tasks +- Add Ollama HTTP client +- Implement embedding requests +- Add configurable embedding model +- Add retry handling +- Add timeout handling + +#### Acceptance Criteria +- Query embeddings generated successfully +- Chunk embeddings generated successfully +- Backend configurable through TOML + +--- + +### [ ] GM-023 — Add embedding generation pipeline + +#### Goals +Generate embeddings during indexing. + +#### Tasks +- Embed chunks during index phase +- Skip unchanged embeddings +- Batch embedding requests +- Add embedding queue abstraction +- Add progress reporting + +#### Acceptance Criteria +- Vault indexing produces embeddings +- Reindex skips unchanged chunks + +--- + +### [ ] GM-024 — Integrate sqlite-vec + +#### Goals +Store and search vectors locally. + +#### Tasks +- Add sqlite-vec dependency +- Create vector schema +- Store chunk vectors +- Add nearest-neighbor search +- Validate vector dimensions + +#### Acceptance Criteria +- Embeddings persist correctly +- Similarity search returns results + +--- + +### [ ] GM-025 — Implement semantic search + +#### Goals +Search by meaning instead of keywords. + +#### Tasks +- Embed query text +- Retrieve nearest vectors +- Rank results by similarity +- Return chunk metadata +- Add configurable result limits + +#### Acceptance Criteria +- Semantically related notes retrieved +- Search quality noticeably useful + +--- + +## Hybrid Retrieval + +### [ ] GM-026 — Create retrieval scoring model + +#### Goals +Combine multiple ranking systems. + +#### Tasks +Add weighted scoring for: +- semantic similarity +- keyword relevance +- recency +- tags +- wikilinks +- path/project affinity + +#### Acceptance Criteria +- Final ranking combines all scoring sources +- Weights configurable + +--- + +### [ ] GM-027 — Add recency boosting + +#### Goals +Favor recently active notes. + +#### Tasks +- Define recency decay function +- Add configurable recency weights +- Support pinned notes +- Add debug scoring output + +#### Acceptance Criteria +- Recent notes boosted appropriately +- Old notes still retrievable + +--- + +### [ ] GM-028 — Add wikilink graph weighting + +#### Goals +Use note relationships during retrieval. + +#### Tasks +- Calculate link adjacency +- Boost linked neighbors +- Support bidirectional relationships +- Add graph traversal depth limit + +#### Acceptance Criteria +- Related linked notes boosted +- Retrieval continuity improved + +--- + +### [ ] GM-029 — Add retrieval debug mode + +#### Goals +Make ranking explainable. + +#### Tasks +Display: +- semantic score +- keyword score +- recency score +- tag score +- link score +- final score + +#### Acceptance Criteria +- Users can inspect ranking behavior +- Retrieval tuning becomes practical + +--- + +## Context Bundles + +### [ ] GM-030 — Create context bundle builder + +#### Goals +Generate LLM-ready retrieval payloads. + +#### Tasks +- Define context bundle structure +- Deduplicate overlapping chunks +- Group by note +- Preserve ordering +- Add metadata blocks + +#### Acceptance Criteria +- Context bundles readable +- Context bundles useful for LLM prompts + +--- + +### [ ] GM-031 — Add token budgeting + +#### Goals +Prevent oversized context payloads. + +#### Tasks +- Track token estimates +- Add configurable token budget +- Trim low-priority chunks +- Preserve high-score chunks first + +#### Acceptance Criteria +- Context stays within configured budget +- Retrieval quality remains useful + +--- + +### [ ] GM-032 — Add context summarization hooks + +#### Goals +Prepare for future summarization support. + +#### Tasks +- Define summarizer interface +- Add optional summarization stage +- Add summary metadata fields +- Support disabling summarization + +#### Acceptance Criteria +- Pipeline supports optional summarization +- Core retrieval still functions without summaries + +--- + +### [ ] GM-033 — Implement `glassmind context` + +#### Goals +Expose high-level retrieval workflow. + +#### Tasks +- Add CLI command +- Format markdown output +- Add JSON mode +- Include sources +- Include retrieval metadata + +#### Acceptance Criteria +- Command usable directly by humans +- Output usable by agents + +--- + +## HTTP API + +### [ ] GM-034 — Add Axum server skeleton + +#### Goals +Expose Glassmind over HTTP. + +#### Tasks +- Add Axum dependency +- Create server bootstrap +- Add config support +- Add graceful shutdown +- Bind localhost by default + +#### Acceptance Criteria +- Server starts successfully +- Local requests succeed + +--- + +### [ ] GM-035 — Implement `/search` endpoint + +#### Goals +Expose search over HTTP. + +#### Tasks +- Define request schema +- Define response schema +- Add pagination +- Add JSON serialization +- Add validation + +#### Acceptance Criteria +- Endpoint returns valid search results +- Errors handled cleanly + +--- + +### [ ] GM-036 — Implement `/context` endpoint + +#### Goals +Expose context retrieval API. + +#### Tasks +- Add context request schema +- Support token budget parameter +- Return structured context bundles +- Include source metadata + +#### Acceptance Criteria +- API returns usable context payloads +- Response structure documented + +--- + +### [ ] GM-037 — Implement `/notes/{id}` endpoint + +#### Goals +Allow direct note retrieval. + +#### Tasks +- Fetch note metadata +- Fetch chunk data +- Return markdown content +- Add error handling + +#### Acceptance Criteria +- Notes retrievable by ID +- Missing notes handled correctly + +--- + +### [ ] GM-038 — Add `/health` and `/stats` + +#### Goals +Support monitoring/debugging. + +#### Tasks +- Add health endpoint +- Add DB stats +- Add vault metrics +- Add embedding counts + +#### Acceptance Criteria +- Health checks usable +- Stats endpoint informative + +--- + +## MCP Support + +### [ ] GM-039 — Create MCP server skeleton + +#### Goals +Allow AI tools to call Glassmind directly. + +#### Tasks +- Add MCP transport support +- Define tool registry +- Implement request dispatch +- Add structured tool responses + +#### Acceptance Criteria +- MCP server starts successfully +- Tool calls function correctly + +--- + +### [ ] GM-040 — Implement `glassmind_search` MCP tool + +#### Goals +Expose search through MCP. + +#### Tasks +- Define tool schema +- Add search execution +- Return structured results +- Include source paths + +#### Acceptance Criteria +- MCP clients can search successfully + +--- + +### [ ] GM-041 — Implement `glassmind_context` MCP tool + +#### Goals +Expose context bundles through MCP. + +#### Tasks +- Add context generation +- Add token budgeting +- Return structured context payloads + +#### Acceptance Criteria +- MCP clients receive usable context bundles + +--- + +### [ ] GM-042 — Implement `glassmind_read` MCP tool + +#### Goals +Allow agents to inspect notes directly. + +#### Tasks +- Fetch note content +- Support chunk-specific reads +- Add note metadata +- Add error handling + +#### Acceptance Criteria +- Agents can retrieve note contents reliably + +--- + +### [ ] GM-043 — Add MCP integration examples + +#### Goals +Document real-world integration. + +#### Tasks +- Add Claude Desktop example +- Add Codex example +- Add local agent example +- Add config examples + +#### Acceptance Criteria +- Users can integrate Glassmind without guesswork + +--- + +## Incremental Indexing + +### [ ] GM-044 — Add file change detection + +#### Goals +Avoid full vault reindexing. + +#### Tasks +- Compare content hashes +- Detect added files +- Detect deleted files +- Detect modified files + +#### Acceptance Criteria +- Incremental indexing functions correctly +- Unchanged notes skipped + +--- + +### [ ] GM-045 — Add filesystem watch mode + +#### Goals +Support live vault updates. + +#### Tasks +- Add filesystem watcher +- Debounce rapid changes +- Trigger partial reindex +- Add watch logging + +#### Acceptance Criteria +- File edits reflected automatically +- No runaway indexing loops + +--- + +### [ ] GM-046 — Add partial embedding regeneration + +#### Goals +Avoid recomputing unchanged vectors. + +#### Tasks +- Detect changed chunks +- Recompute only dirty embeddings +- Preserve existing vectors +- Handle deleted chunks + +#### Acceptance Criteria +- Reindex significantly faster after small edits + +--- + +## Agent Workspace + +### [ ] GM-047 — Create `.agent/` workspace structure + +#### Goals +Establish safe agent-owned storage. + +#### Tasks +Create: +- `.agent/memories` +- `.agent/tasks` +- `.agent/summaries` +- `.agent/logs` +- `.agent/cache` + +#### Acceptance Criteria +- Workspace generated automatically +- Structure documented + +--- + +### [ ] GM-048 — Add memory capture commands + +#### Goals +Allow structured memory persistence. + +#### Tasks +Add: +- `capture-memory` +- `capture-task` +- `capture-decision` + +Store entries as markdown. + +#### Acceptance Criteria +- Commands append correctly +- Entries index correctly + +--- + +### [ ] GM-049 — Index `.agent/` content + +#### Goals +Allow generated memory retrieval. + +#### Tasks +- Include `.agent/` in indexing pipeline +- Tag generated content +- Preserve provenance metadata + +#### Acceptance Criteria +- Agent-generated notes searchable +- Provenance visible + +--- + +### [ ] GM-050 — Add retrieval audit logging + +#### Goals +Track retrieval behavior for debugging. + +#### Tasks +Log: +- query +- retrieved chunks +- retrieval scores +- timestamp +- requesting client + +#### Acceptance Criteria +- Retrievals traceable +- Logs useful for tuning/debugging +``` +--- + +# What's Next + +## Retrieval Quality +- Evaluation datasets +- Ranking tuning +- Query debugging +- Explainable scoring + +## Performance +- Parallel indexing +- Cached embeddings +- Batch embedding generation +- Large vault optimization + +## Future Ideas +- Git history awareness +- Temporal retrieval +- Canvas parsing +- Code-aware chunking +- Multi-vault support +- Graph exploration +- Retrieval visualization +- Vault analytics +- Semantic diffing +- “What changed?” context reports +- Local reranking models +- Session continuity memory +- Agent-safe write proposals +``` diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 0000000..a602cfe --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,535 @@ +````md +# FAQ.md + +# Frequently Asked Questions + +## What is Glassmind? + +Glassmind is a local-first semantic retrieval and memory system for markdown knowledge bases. + +It indexes markdown files, builds semantic and structural search indexes, and exposes retrieval APIs for: +- AI assistants +- local models +- agents +- MCP clients +- automation tooling +- humans using the CLI directly + +Glassmind is designed to work especially well with Obsidian vaults, but only requires a directory of markdown files. + +--- + +# What problem is Glassmind solving? + +Modern LLMs are powerful but stateless. + +They: +- lose context +- forget projects +- cannot inherently understand your local files +- have limited prompt windows +- hallucinate when context is missing + +Meanwhile many people already maintain: +- engineering documentation +- project journals +- research notes +- worldbuilding +- task tracking +- personal knowledge systems + +inside markdown repositories. + +Glassmind bridges those worlds. + +It provides: +- retrieval +- semantic search +- context construction +- memory indexing + +over existing markdown workflows. + +--- + +# Is Glassmind an AI agent? + +No. + +Glassmind is retrieval infrastructure. + +It does not: +- autonomously execute tasks +- reason independently +- act as a chatbot +- replace orchestration frameworks + +It is closer to: +- a search engine +- a semantic index +- a memory API +- a retrieval layer + +Agents and AI tools call Glassmind to retrieve relevant context. + +--- + +# Is Glassmind tied to Obsidian? + +No. + +Glassmind is markdown-native. + +It works with: +- Obsidian vaults +- plain markdown directories +- docs repositories +- PKM systems +- engineering notebooks +- wiki-style folder structures + +Obsidian is simply a particularly good fit because: +- it is local-first +- it uses markdown +- it has strong linking semantics +- it is widely adopted + +Glassmind treats markdown files as canonical regardless of editor. + +--- + +# Why markdown? + +Because markdown is: +- portable +- durable +- inspectable +- editor-agnostic +- version-control friendly +- human-readable + +Glassmind intentionally avoids proprietary storage formats for primary knowledge. + +The markdown files remain the source of truth. + +Everything else is rebuildable. + +--- + +# What is the source of truth? + +The markdown files. + +Glassmind builds: +- indexes +- caches +- embeddings +- retrieval metadata + +on top of them. + +The database is disposable and rebuildable. + +If Glassmind disappears, the notes still work. + +--- + +# What database does Glassmind use? + +Planned v1: + +```text +SQLite +sqlite-vec +``` + +SQLite stores: +- note metadata +- chunk metadata +- tags +- links +- retrieval state +- indexes + +sqlite-vec stores: +- semantic vectors ("embeddings") + +The database is local and rebuildable. + +--- + +# Why SQLite? + +Because SQLite is: +- local-first +- fast enough +- battle-tested +- portable +- operationally simple + +Glassmind intentionally avoids requiring: +- external database servers +- cloud infrastructure +- distributed systems +- operational overhead + +for normal usage. + +--- + +# What are embeddings? + +Embeddings are vector representations of semantic meaning. + +A chunk of text is transformed into a vector like: + +```text +[0.12, -0.44, 0.89, ...] +``` + +Vectors with similar meaning are located near each other mathematically. + +This enables semantic search. + +Example: + +```text +"persistent semantic cache" +``` + +can match: + +```text +"local memory system" +``` + +even if the wording differs. + +--- + +# Does Glassmind require online APIs? + +No. + +Glassmind is designed for local operation. + +Planned local embedding options: +- Ollama +- fastembed-rs +- llama.cpp-compatible backends + +Cloud embeddings may eventually be optional, but local-first is the default philosophy. + +--- + +# What is hybrid retrieval? + +Glassmind does not rely solely on embeddings. + +Retrieval combines: +- semantic similarity +- keyword matching +- tags +- wikilinks +- recency +- project/path weighting +- hot memory boosting + +This generally performs better than pure vector search. + +--- + +# What is a chunk? + +A chunk is a retrieval unit. + +Instead of embedding entire files, Glassmind splits documents into smaller pieces. + +Usually: +- heading sections +- paragraphs +- task blocks +- code blocks +- fixed-size fallback windows + +Chunking improves: +- retrieval quality +- precision +- context density +- token efficiency + +--- + +# What is a context bundle? + +A context bundle is an LLM-ready retrieval result. + +Instead of returning raw search matches only, Glassmind can assemble: +- relevant chunks +- related notes +- recent project activity +- linked concepts +- source references + +into a structured payload optimized for AI consumption. + +Example: + +```text +"Help me continue the Glassmind architecture work" +``` + +might retrieve: +- recent architecture notes +- TODOs +- design decisions +- linked experiments +- related discussions + +within a configurable token budget. + +--- + +# What is MCP? + +MCP stands for: + +```text +Model Context Protocol +``` + +It is a protocol used by AI tools to interact with external systems and tools. + +Glassmind plans to expose MCP-compatible retrieval tools such as: + +```text +glassmind_search +glassmind_context +glassmind_read +``` + +This allows tools like Claude Code or other agent systems to retrieve vault context directly. + +--- + +# How is Glassmind different from traditional RAG systems? + +Many RAG systems are: +- cloud-first +- opaque +- tightly coupled to vector databases +- detached from user workflows +- document-ingestion pipelines rather than knowledge systems + +Glassmind is designed around: +- local-first operation +- markdown-native workflows +- inspectability +- rebuildability +- human-readable source material +- AI + human co-usage + +Glassmind assumes the markdown corpus is already meaningful. + +It focuses on retrieval quality and continuity. + +--- + +# How is Glassmind different from vector databases? + +Vector databases store embeddings and perform nearest-neighbor search. + +Glassmind is: +- retrieval orchestration +- indexing +- chunking +- metadata extraction +- semantic ranking +- context assembly +- markdown-aware infrastructure + +Glassmind may use vector storage internally, but it is not merely a vector DB wrapper. + +--- + +# Will Glassmind modify my notes? + +By default: +- no direct user note modification + +Glassmind may optionally write to: + +```text +.agent/ +``` + +for: +- summaries +- logs +- generated memory +- task state +- context artifacts + +Future configurable modes may support: +- proposed diffs +- explicit approvals +- direct modification + +but user ownership and safety are priorities. + +--- + +# Why not just use grep or ripgrep? + +Keyword search is extremely useful and Glassmind still supports it. + +But semantic retrieval solves problems like: + +```text +"I know I wrote about this concept but I forgot the terminology." +``` + +Glassmind combines: +- keyword retrieval +- semantic retrieval +- structural metadata +- recency +- graph relationships + +rather than replacing traditional search entirely. + +--- + +# Why not use a graph database? + +Maybe eventually. + +But for v1: +- simplicity +- rebuildability +- portability +- operational sanity + +matter more. + +SQLite plus semantic indexing is likely sufficient for: +- personal vaults +- power-user vaults +- local AI workflows + +Graph semantics can still exist logically without introducing a distributed graph infrastructure problem on day one. + +--- + +# Is Glassmind intended for teams? + +Not initially. + +The primary target is: +- individuals +- researchers +- engineers +- writers +- local AI workflows +- personal knowledge systems + +Future multi-user support is possible but not the immediate focus. + +--- + +# What does “local-first” actually mean here? + +The intended default behavior is: + +- local storage +- localhost-only networking +- optional offline operation +- local embeddings +- markdown canonical storage +- rebuildable indexes +- no required cloud dependency +- no telemetry + +Glassmind should remain usable: +- disconnected +- self-hosted +- archived +- years into the future + +--- + +# What does “hot memory” mean? + +Glassmind conceptually separates retrieval into: +- hot memory +- warm memory +- cold memory + +Hot memory includes: +- recent notes +- active projects +- pinned information +- recently retrieved context + +Cold memory still exists, but is less likely to be automatically surfaced. + +This helps context selection remain relevant without deleting historical information. + +--- + +# What are the long-term goals? + +Long-term goals include: +- strong retrieval quality +- excellent local AI workflows +- durable markdown-native memory infrastructure +- robust MCP integration +- context continuity across sessions +- transparent retrieval behavior +- inspectable ranking systems +- ergonomic semantic search + +Not: +- replacing human thought +- building autonomous AGI office workers +- trapping users inside proprietary ecosystems + +--- + +# Why is the project opinionated? + +Because retrieval quality and long-term maintainability depend heavily on architecture choices. + +Glassmind intentionally prefers: +- explicit systems +- rebuildable state +- inspectability +- portability +- user ownership +- operational simplicity + +over: +- hidden magic +- giant opaque pipelines +- cloud dependence +- maximal abstraction + +--- + +# Why the name “Glassmind”? + +The original idea was: + +```text +semantic transparency into your own thoughts +``` + +The system is supposed to feel like: +- peering through glass +- inspecting memory +- traversing thought structures + +rather than interacting with a black box. + +Also it sounded less alarming than some of the other candidate names. +```` diff --git a/docs/huh.md b/docs/huh.md new file mode 100644 index 0000000..a9d3ad2 --- /dev/null +++ b/docs/huh.md @@ -0,0 +1,470 @@ + +# Okay, what *is* this thing? + +Glassmind is a tool that helps AI systems search and understand your notes without uploading your brain to somebody else’s servers. + +If you use: +- Obsidian +- markdown notes +- personal knowledge management tools +- giant folders full of half-finished thoughts +- daily notes +- project logs +- research docs +- creative writing +- engineering notes + +…Glassmind is designed to make those notes actually usable by AI tools. + +--- + +# The Short Version + +Glassmind turns your Obsidian vault into something AI can search intelligently. + +Not just: + +```text +find exact words +``` + +but: + +```text +find ideas related to what I mean +``` + +without requiring: +- cloud services +- subscriptions +- proprietary formats +- uploading your notes to random startups + +--- + +# Explain It Like I’m Normal + +Imagine you have: +- thousands of notes +- years of project ideas +- meeting notes +- technical docs +- TODOs +- journal entries +- random fragments of thoughts + +You vaguely remember writing something useful six months ago. + +You search: + +```text +"local memory system" +``` + +But the note was actually called: + +```text +"persistent semantic context cache" +``` + +Normal search often fails there. + +Glassmind is designed to make that search work anyway. + +--- + +# How? + +Glassmind builds an index of your notes. + +Think of it like: +- a library catalog +- a search engine +- a map of your vault +- a memory assistant + +It reads your markdown files and stores: +- note titles +- headings +- tags +- links +- sections +- semantic fingerprints ("embeddings") + +Then when you search, it tries to find notes related by: +- meaning +- keywords +- tags +- recency +- links between notes +- project relationships + +--- + +# What Are “Embeddings”? + +This is the scary AI word everyone uses without explaining. + +An embedding is basically: + +```text +a mathematical fingerprint of meaning +``` + +Glassmind converts chunks of text into vectors (lists of numbers) that represent semantic similarity. + +Meaning: + +```text +"local memory tool" +``` + +can match: + +```text +"persistent semantic cache" +``` + +even though the words are different. + +This is what makes modern semantic search possible. + +--- + +# Is This Another AI? + +Not really. + +Glassmind is infrastructure. + +It does not: +- roleplay +- think +- chat +- plan your life +- replace your notes + +It retrieves context. + +Think: + +```text +AI assistant ← Glassmind ← Your notes +``` + +Glassmind is the memory layer. + +--- + +# Why Not Just Use ChatGPT Directly? + +You can. + +But large language models have bad long-term memory. + +They: +- lose context +- forget projects +- hallucinate +- cannot automatically understand your vault structure +- do not inherently “know your notes” + +Glassmind helps solve that by retrieving useful context automatically. + +--- + +# Why Obsidian? + +Because Obsidian is already: +- local-first +- markdown-based +- widely used +- human-readable +- flexible +- not tied to a proprietary database + +Glassmind treats your Obsidian vault as the canonical source of truth. + +Your notes remain: +- plain files +- portable +- editable without Glassmind +- future-proof + +If Glassmind disappeared tomorrow, your notes would still work. + +That is intentional. + +--- + +# What Does “Local-First” Mean? + +It means: +- your notes stay on your machine +- you control the files +- the system works offline +- cloud services are optional +- the software is designed around ownership + +Glassmind is intentionally designed to avoid: +- vendor lock-in +- telemetry creep +- cloud dependency +- “AI platform” nonsense +- enshittification + +--- + +# What Does Glassmind Actually Do? + +## Indexing + +Glassmind scans your vault and builds a searchable index. + +--- + +## Semantic Search + +Find related ideas, not just exact words. + +--- + +## Context Bundles + +This is one of the big goals. + +Instead of dumping entire folders into an AI prompt, Glassmind tries to gather: + +```text +the notes that actually matter +``` + +for the current task. + +Example: + +```text +"Help me continue my game engine project" +``` + +Glassmind might return: +- recent engine notes +- TODOs +- architecture docs +- related experiments +- previous decisions +- linked concepts + +This gives AI tools much better context. + +--- + +# What Is RAG? + +RAG means: + +```text +Retrieval-Augmented Generation +``` + +Which is an extremely annoying phrase for a simple idea: + +```text +Find useful information before asking the AI to answer. +``` + +Without RAG: + +```text +AI guesses from training data +``` + +With RAG: + +```text +AI uses your actual notes/documents +``` + +Glassmind is a RAG system for Obsidian vaults. + +--- + +# Is This Replacing Obsidian? + +No. + +Obsidian remains: +- the note editor +- the vault UI +- the writing environment +- the graph view +- the human-facing tool + +Glassmind is: +- indexing +- retrieval +- semantic search +- memory infrastructure +- agent tooling + +--- + +# Is This Safe? + +The project is designed around: +- local-first storage +- rebuildable indexes +- markdown as source of truth +- minimal hidden state + +By default, Glassmind should avoid modifying user notes directly. + +Instead it may use: + +```text +.agent/ +``` + +for: +- generated summaries +- memory captures +- task state +- logs +- temporary outputs + +The idea is: +- your notes belong to you +- generated content is separated +- the system stays understandable + +--- + +# Who Is This For? + +Probably: +- software engineers +- researchers +- writers +- worldbuilders +- Obsidian users +- AI workflow nerds +- people building local AI setups +- people tired of cloud everything + +--- + +# What Is This NOT For? + +Probably not: +- enterprise surveillance software +- replacing databases +- fully autonomous AGI agent swarms +- “AI employees” +- growth-hacking your notes + +At least not intentionally. + +--- + +# Why Does This Exist? + +Because many people already use Obsidian as: +- memory +- project state +- idea storage +- engineering documentation +- thinking infrastructure + +But AI systems are still surprisingly bad at interacting with that information cleanly. + +Glassmind exists to bridge that gap without taking ownership away from the user. + +--- + +# Philosophy + +Glassmind is opinionated about a few things. + +## Your Notes Should Stay Yours + +Markdown files are the canonical source of truth. + +--- + +## Local-First Matters + +Software should still function when: +- offline +- self-hosted +- unsupported +- five years old + +--- + +## AI Should Augment Retrieval, Not Replace Thought + +Glassmind is designed to help: +- surface context +- reduce friction +- improve continuity + +Not automate human meaning out of existence. + +--- + +## Avoid Hidden Magic + +The system should be: +- inspectable +- debuggable +- rebuildable +- understandable + +If the index breaks, rebuild it. + +If the retrieval is bad, improve scoring. + +If the AI hallucinates, expose sources. + +--- + +# The Dream + +The dream is not: + +```text +"AI writes your life for you" +``` + +The dream is: + +```text +"AI can finally understand your existing context well enough to be genuinely useful" +``` + +That’s a very different goal. + +--- + +# Final Summary + +Glassmind is: + +```text +local semantic memory infrastructure for Obsidian vaults +``` + +It helps AI systems retrieve useful context from your notes while keeping: +- ownership +- portability +- transparency +- local control + +intact. + +Or, less formally: + +```text +It lets the robot read your notes without handing your brain to a startup. +``` + diff --git a/examples/.gitkeep b/examples/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/.gitkeep @@ -0,0 +1 @@ + diff --git a/fixtures/.gitkeep b/fixtures/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/fixtures/.gitkeep @@ -0,0 +1 @@ + diff --git a/glassmind.toml b/glassmind.toml new file mode 100644 index 0000000..fc1ca40 --- /dev/null +++ b/glassmind.toml @@ -0,0 +1,33 @@ +[vault] +path = "." + +[index] +include_agent_dir = true +ignore_dirs = [ + ".git", + ".obsidian", + ".trash", + ".agent/cache", +] +chunk_target_tokens = 500 +chunk_overlap_tokens = 80 + +[embeddings] +backend = "ollama" +model = "nomic-embed-text" +url = "http://localhost:11434" + +[search] +semantic_weight = 0.55 +keyword_weight = 0.25 +recency_weight = 0.1 +link_weight = 0.05 +tag_weight = 0.05 + +[writes] +mode = "agent-only" +agent_dir = ".agent" + +[server] +host = "127.0.0.1" +port = 7331 diff --git a/scripts/.gitkeep b/scripts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/scripts/.gitkeep @@ -0,0 +1 @@ + diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..63dc879 --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,70 @@ +use std::path::PathBuf; + +use clap::{Parser, Subcommand, ValueEnum}; + +#[derive(Debug, Parser)] +#[command(name = "glassmind")] +#[command(about = "Local-first retrieval over markdown vaults")] +#[command(version)] +pub struct Cli { + /// Path to glassmind.toml. + #[arg(long, global = true)] + pub config: Option, + + /// Override the vault path from config. + #[arg(long, global = true)] + pub vault: Option, + + /// Enable debug logging. + #[arg(long, global = true)] + pub debug: bool, + + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Debug, Subcommand)] +pub enum Commands { + /// Create a starter config and the agent-owned workspace. + Init { + /// Overwrite an existing glassmind.toml. + #[arg(long)] + force: bool, + }, + /// Scan the configured vault and report discovered markdown notes. + Index { + /// Emit JSON instead of text. + #[arg(long)] + json: bool, + }, + /// Search the current markdown vault with lightweight local matching. + Search { + query: String, + #[arg(short, long, default_value_t = 10)] + limit: usize, + #[arg(long, value_enum, default_value_t = OutputFormat::Text)] + output: OutputFormat, + }, + /// Build a human-readable context bundle from matching notes. + Context { + query: String, + #[arg(short, long, default_value_t = 5)] + limit: usize, + #[arg(long, value_enum, default_value_t = OutputFormat::Text)] + output: OutputFormat, + }, + /// Start the future localhost HTTP API. + Serve, + /// Show vault scan metrics. + Stats { + /// Emit JSON instead of text. + #[arg(long)] + json: bool, + }, +} + +#[derive(Clone, Debug, ValueEnum)] +pub enum OutputFormat { + Text, + Json, +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..ec8d3e3 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,174 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow, bail}; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Config { + pub vault: VaultConfig, + pub index: IndexConfig, + pub embeddings: EmbeddingsConfig, + pub search: SearchConfig, + pub writes: WritesConfig, + pub server: ServerConfig, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct VaultConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct IndexConfig { + pub include_agent_dir: bool, + pub ignore_dirs: Vec, + pub chunk_target_tokens: usize, + pub chunk_overlap_tokens: usize, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct EmbeddingsConfig { + pub backend: String, + pub model: String, + pub url: String, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchConfig { + pub semantic_weight: f32, + pub keyword_weight: f32, + pub recency_weight: f32, + pub link_weight: f32, + pub tag_weight: f32, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct WritesConfig { + pub mode: String, + pub agent_dir: PathBuf, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct ServerConfig { + pub host: String, + pub port: u16, +} + +impl Config { + pub fn load(path: Option<&Path>) -> Result { + let path = path + .map(Path::to_path_buf) + .unwrap_or_else(Self::default_path); + if !path.exists() { + return Ok(Self::default()); + } + + let raw = fs::read_to_string(&path) + .with_context(|| format!("failed to read config {}", path.display()))?; + toml::from_str(&raw).with_context(|| format!("invalid config {}", path.display())) + } + + pub fn default_path() -> PathBuf { + PathBuf::from("glassmind.toml") + } + + pub fn with_cli_vault(mut self, vault: Option) -> Self { + if let Some(vault) = vault { + self.vault.path = vault; + } + self + } + + pub fn validate(&self) -> Result<()> { + if self.vault.path.as_os_str().is_empty() { + bail!("vault.path must not be empty"); + } + if self.index.chunk_target_tokens == 0 { + bail!("index.chunk_target_tokens must be greater than zero"); + } + if self.index.chunk_overlap_tokens >= self.index.chunk_target_tokens { + bail!("index.chunk_overlap_tokens must be smaller than index.chunk_target_tokens"); + } + if self.server.port == 0 { + bail!("server.port must be greater than zero"); + } + match self.writes.mode.as_str() { + "off" | "agent-only" | "propose" | "allow" => {} + other => { + bail!("writes.mode must be one of off, agent-only, propose, allow; got {other}") + } + } + Ok(()) + } + + pub fn write_default_file(&self, force: bool) -> Result<()> { + let path = Self::default_path(); + if path.exists() && !force { + return Err(anyhow!( + "{} already exists; pass --force to overwrite it", + path.display() + )); + } + + let raw = toml::to_string_pretty(self).context("failed to serialize default config")?; + fs::write(&path, raw).with_context(|| format!("failed to write {}", path.display())) + } + + pub fn create_agent_dirs(&self) -> Result<()> { + let base = self.vault.path.join(&self.writes.agent_dir); + for dir in [ + "memories", + "summaries", + "tasks", + "decisions", + "logs", + "cache", + ] { + fs::create_dir_all(base.join(dir)) + .with_context(|| format!("failed to create {}", base.join(dir).display()))?; + } + Ok(()) + } +} + +impl Default for Config { + fn default() -> Self { + Self { + vault: VaultConfig { + path: PathBuf::from("."), + }, + index: IndexConfig { + include_agent_dir: true, + ignore_dirs: vec![ + ".git".to_string(), + ".obsidian".to_string(), + ".trash".to_string(), + ".agent/cache".to_string(), + ], + chunk_target_tokens: 500, + chunk_overlap_tokens: 80, + }, + embeddings: EmbeddingsConfig { + backend: "ollama".to_string(), + model: "nomic-embed-text".to_string(), + url: "http://localhost:11434".to_string(), + }, + search: SearchConfig { + semantic_weight: 0.55, + keyword_weight: 0.25, + recency_weight: 0.10, + link_weight: 0.05, + tag_weight: 0.05, + }, + writes: WritesConfig { + mode: "agent-only".to_string(), + agent_dir: PathBuf::from(".agent"), + }, + server: ServerConfig { + host: "127.0.0.1".to_string(), + port: 7331, + }, + } + } +} diff --git a/src/logging.rs b/src/logging.rs new file mode 100644 index 0000000..e53265e --- /dev/null +++ b/src/logging.rs @@ -0,0 +1,23 @@ +use anyhow::{Result, anyhow}; +use tracing_subscriber::{EnvFilter, fmt}; + +pub fn init(debug: bool) -> Result<()> { + let default_level = if debug { + "glassmind=debug" + } else { + "glassmind=info" + }; + let filter = + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_level)); + + fmt() + .with_env_filter(filter) + .with_target(debug) + .with_file(debug) + .with_line_number(debug) + .compact() + .try_init() + .map_err(|err| anyhow!("failed to initialize logging: {err}"))?; + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e31a895 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,99 @@ +mod cli; +mod config; +mod logging; +mod markdown; +mod vault; + +use anyhow::Result; +use clap::Parser; +use tracing::{debug, info}; + +use crate::cli::{Cli, Commands, OutputFormat}; +use crate::config::Config; +use crate::vault::VaultIndex; + +fn main() -> Result<()> { + let cli = Cli::parse(); + logging::init(cli.debug)?; + + let config = Config::load(cli.config.as_deref())?.with_cli_vault(cli.vault); + config.validate()?; + + debug!(?config, "loaded config"); + + match cli.command { + Commands::Init { force } => init_project(&config, force), + Commands::Index { json } => { + let index = VaultIndex::scan(&config)?; + if json { + println!("{}", serde_json::to_string_pretty(&index.summary())?); + } else { + println!("{}", index.summary()); + } + Ok(()) + } + Commands::Stats { json } => { + let index = VaultIndex::scan(&config)?; + if json { + println!("{}", serde_json::to_string_pretty(&index.summary())?); + } else { + println!("{}", index.summary()); + } + Ok(()) + } + Commands::Search { + query, + limit, + output, + } => { + let index = VaultIndex::scan(&config)?; + let results = index.search(&query, limit); + match output { + OutputFormat::Text => { + if results.is_empty() { + println!("No matches."); + } + for (position, result) in results.iter().enumerate() { + println!("{}. {}", position + 1, result.note.path.display()); + println!(" title: {}", result.note.title); + if !result.note.headings.is_empty() { + println!(" headings: {}", result.note.headings.join(" > ")); + } + println!(" score: {}", result.score); + } + } + OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&results)?), + } + Ok(()) + } + Commands::Context { + query, + limit, + output, + } => { + let index = VaultIndex::scan(&config)?; + let bundle = index.context_bundle(&query, limit); + match output { + OutputFormat::Text => println!("{}", bundle.to_markdown()), + OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&bundle)?), + } + Ok(()) + } + Commands::Serve => { + info!("serve command is reserved for the HTTP API milestone"); + println!( + "HTTP API is not implemented yet. Planned bind: {}:{}", + config.server.host, config.server.port + ); + Ok(()) + } + } +} + +fn init_project(config: &Config, force: bool) -> Result<()> { + config.write_default_file(force)?; + config.create_agent_dirs()?; + println!("Initialized Glassmind at {}", config.vault.path.display()); + println!("Config: {}", Config::default_path().display()); + Ok(()) +} diff --git a/src/markdown.rs b/src/markdown.rs new file mode 100644 index 0000000..9e0cd9f --- /dev/null +++ b/src/markdown.rs @@ -0,0 +1,254 @@ +use regex::Regex; +use serde::Serialize; + +#[derive(Clone, Debug, Serialize)] +pub struct MarkdownDocument { + pub headings: Vec, + pub blocks: Vec, + pub wikilinks: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct MarkdownBlock { + pub kind: MarkdownBlockKind, + pub text: String, + pub start_line: usize, + pub end_line: usize, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum MarkdownBlockKind { + Heading, + Paragraph, + CodeBlock, + List, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Wikilink { + pub source: String, + pub target: String, + pub alias: Option, +} + +pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument { + let _ = pulldown_cmark::Parser::new_ext(content, pulldown_cmark::Options::all()).count(); + + let mut headings = Vec::new(); + let mut blocks = Vec::new(); + let mut paragraph = Vec::new(); + let mut paragraph_start = 0; + let mut in_code = false; + let mut code = Vec::new(); + let mut code_start = 0; + + for (idx, line) in content.lines().enumerate() { + let line_no = idx + 1; + let trimmed = line.trim(); + + if trimmed.starts_with("```") || trimmed.starts_with("~~~") { + if in_code { + code.push(line.to_string()); + blocks.push(MarkdownBlock { + kind: MarkdownBlockKind::CodeBlock, + text: code.join("\n"), + start_line: code_start, + end_line: line_no, + }); + code.clear(); + in_code = false; + } else { + flush_paragraph( + &mut blocks, + &mut paragraph, + paragraph_start, + line_no.saturating_sub(1), + ); + in_code = true; + code_start = line_no; + code.push(line.to_string()); + } + continue; + } + + if in_code { + code.push(line.to_string()); + continue; + } + + if let Some(heading) = parse_heading(trimmed) { + flush_paragraph( + &mut blocks, + &mut paragraph, + paragraph_start, + line_no.saturating_sub(1), + ); + headings.push(heading.clone()); + blocks.push(MarkdownBlock { + kind: MarkdownBlockKind::Heading, + text: heading, + start_line: line_no, + end_line: line_no, + }); + continue; + } + + if is_list_item(trimmed) { + flush_paragraph( + &mut blocks, + &mut paragraph, + paragraph_start, + line_no.saturating_sub(1), + ); + blocks.push(MarkdownBlock { + kind: MarkdownBlockKind::List, + text: trimmed.to_string(), + start_line: line_no, + end_line: line_no, + }); + continue; + } + + if trimmed.is_empty() { + flush_paragraph( + &mut blocks, + &mut paragraph, + paragraph_start, + line_no.saturating_sub(1), + ); + continue; + } + + if paragraph.is_empty() { + paragraph_start = line_no; + } + paragraph.push(trimmed.to_string()); + } + + let final_line = content.lines().count(); + if in_code { + blocks.push(MarkdownBlock { + kind: MarkdownBlockKind::CodeBlock, + text: code.join("\n"), + start_line: code_start, + end_line: final_line, + }); + } + flush_paragraph(&mut blocks, &mut paragraph, paragraph_start, final_line); + + MarkdownDocument { + headings, + blocks, + wikilinks: extract_wikilinks(source_path, content), + } +} + +fn flush_paragraph( + blocks: &mut Vec, + paragraph: &mut Vec, + start_line: usize, + end_line: usize, +) { + if paragraph.is_empty() { + return; + } + + blocks.push(MarkdownBlock { + kind: MarkdownBlockKind::Paragraph, + text: paragraph.join(" "), + start_line, + end_line, + }); + paragraph.clear(); +} + +fn parse_heading(trimmed: &str) -> Option { + let hashes = trimmed.chars().take_while(|c| *c == '#').count(); + if (1..=6).contains(&hashes) && trimmed.chars().nth(hashes) == Some(' ') { + Some(trimmed[hashes + 1..].trim().to_string()) + } else { + None + } +} + +fn is_list_item(trimmed: &str) -> bool { + trimmed.starts_with("- ") + || trimmed.starts_with("* ") + || trimmed.starts_with("+ ") + || trimmed.split_once(". ").is_some_and(|(prefix, _)| { + !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) + }) +} + +pub fn extract_wikilinks(source_path: &str, content: &str) -> Vec { + let link_re = Regex::new(r"\[\[([^\]\|]+?)(?:\|([^\]]+))?\]\]").expect("valid wikilink regex"); + link_re + .captures_iter(content) + .filter_map(|capture| { + let target = capture.get(1)?.as_str().trim().to_string(); + if target.is_empty() { + return None; + } + let alias = capture + .get(2) + .map(|m| m.as_str().trim().to_string()) + .filter(|s| !s.is_empty()); + Some(Wikilink { + source: source_path.to_string(), + target, + alias, + }) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::{MarkdownBlockKind, extract_wikilinks, parse_markdown}; + + #[test] + fn extracts_obsidian_wikilink_forms() { + let links = extract_wikilinks( + "source.md", + "[[note]] [[note|alias]] [[folder/note]] [[folder/note#Heading|Alias]]", + ); + + assert_eq!(links.len(), 4); + assert_eq!(links[0].target, "note"); + assert_eq!(links[0].alias, None); + assert_eq!(links[1].target, "note"); + assert_eq!(links[1].alias.as_deref(), Some("alias")); + assert_eq!(links[2].target, "folder/note"); + assert_eq!(links[3].target, "folder/note#Heading"); + assert_eq!(links[3].alias.as_deref(), Some("Alias")); + } + + #[test] + fn extracts_markdown_structure_from_malformed_input() { + let document = parse_markdown( + "note.md", + "# Title\n\nParagraph text\n\n- item\n\n```rust\nfn main() {}\n", + ); + + assert_eq!(document.headings, vec!["Title"]); + assert!( + document + .blocks + .iter() + .any(|block| matches!(block.kind, MarkdownBlockKind::Paragraph)) + ); + assert!( + document + .blocks + .iter() + .any(|block| matches!(block.kind, MarkdownBlockKind::List)) + ); + assert!( + document + .blocks + .iter() + .any(|block| matches!(block.kind, MarkdownBlockKind::CodeBlock)) + ); + } +} diff --git a/src/vault.rs b/src/vault.rs new file mode 100644 index 0000000..e1f5cf8 --- /dev/null +++ b/src/vault.rs @@ -0,0 +1,312 @@ +use std::fmt; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::UNIX_EPOCH; + +use anyhow::{Context, Result}; +use serde::Serialize; +use tracing::{debug, warn}; +use walkdir::{DirEntry, WalkDir}; + +use crate::config::Config; +use crate::markdown::{MarkdownBlock, Wikilink, parse_markdown}; + +#[derive(Clone, Debug, Serialize)] +pub struct VaultIndex { + pub vault_path: PathBuf, + pub notes: Vec, + pub markdown_count: usize, + pub skipped_dirs: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct NoteMetadata { + pub path: PathBuf, + pub filename: String, + pub title: String, + pub modified_unix_secs: Option, + pub file_size: u64, + pub headings: Vec, + pub blocks: Vec, + pub wikilinks: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct IndexSummary { + pub vault_path: PathBuf, + pub notes_indexed: usize, + pub markdown_files: usize, + pub headings: usize, + pub blocks: usize, + pub wikilinks: usize, + pub skipped_dirs: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct SearchResult { + pub note: NoteMetadata, + pub score: usize, +} + +#[derive(Clone, Debug, Serialize)] +pub struct ContextBundle { + pub query: String, + pub sources: Vec, +} + +impl VaultIndex { + pub fn scan(config: &Config) -> Result { + let vault_path = config + .vault + .path + .canonicalize() + .unwrap_or_else(|_| config.vault.path.clone()); + let mut notes = Vec::new(); + let mut skipped_dirs = Vec::new(); + + let walker = WalkDir::new(&config.vault.path) + .follow_links(false) + .into_iter() + .filter_entry(|entry| { + should_enter(entry, &config.vault.path, config, &mut skipped_dirs) + }); + + for entry in walker { + let entry = match entry { + Ok(entry) => entry, + Err(err) => { + warn!("skipping unreadable path: {err}"); + continue; + } + }; + + if !entry.file_type().is_file() || !is_markdown(entry.path()) { + continue; + } + + let note = read_note(entry.path(), &config.vault.path)?; + debug!( + path = %note.path.display(), + title = %note.title, + size = note.file_size, + headings = note.headings.len(), + links = note.wikilinks.len(), + "indexed note metadata" + ); + notes.push(note); + } + + notes.sort_by(|a, b| a.path.cmp(&b.path)); + let markdown_count = notes.len(); + + Ok(Self { + vault_path, + notes, + markdown_count, + skipped_dirs, + }) + } + + pub fn summary(&self) -> IndexSummary { + IndexSummary { + vault_path: self.vault_path.clone(), + notes_indexed: self.notes.len(), + markdown_files: self.markdown_count, + headings: self.notes.iter().map(|note| note.headings.len()).sum(), + blocks: self.notes.iter().map(|note| note.blocks.len()).sum(), + wikilinks: self.notes.iter().map(|note| note.wikilinks.len()).sum(), + skipped_dirs: self.skipped_dirs.clone(), + } + } + + pub fn search(&self, query: &str, limit: usize) -> Vec { + let terms = query_terms(query); + let mut results: Vec<_> = self + .notes + .iter() + .filter_map(|note| { + let haystack = format!( + "{} {} {}", + note.path.display(), + note.title, + note.blocks + .iter() + .map(|block| block.text.as_str()) + .collect::>() + .join(" ") + ) + .to_lowercase(); + let score = terms + .iter() + .filter(|term| haystack.contains(term.as_str())) + .count(); + (score > 0).then(|| SearchResult { + note: note.clone(), + score, + }) + }) + .collect(); + + results.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.note.path.cmp(&b.note.path)) + }); + results.truncate(limit); + results + } + + pub fn context_bundle(&self, query: &str, limit: usize) -> ContextBundle { + ContextBundle { + query: query.to_string(), + sources: self.search(query, limit), + } + } +} + +impl ContextBundle { + pub fn to_markdown(&self) -> String { + let mut out = format!("# Glassmind Context\n\nQuery: `{}`\n\n", self.query); + if self.sources.is_empty() { + out.push_str("No matching markdown notes were found.\n"); + return out; + } + + out.push_str("## Sources\n\n"); + for (idx, result) in self.sources.iter().enumerate() { + out.push_str(&format!( + "{}. `{}` - score {}\n", + idx + 1, + result.note.path.display(), + result.score + )); + out.push_str(&format!(" - title: {}\n", result.note.title)); + if !result.note.headings.is_empty() { + out.push_str(&format!( + " - headings: {}\n", + result.note.headings.join(" > ") + )); + } + if !result.note.wikilinks.is_empty() { + let links = result + .note + .wikilinks + .iter() + .map(|link| match &link.alias { + Some(alias) => format!("{} as {}", link.target, alias), + None => link.target.clone(), + }) + .collect::>() + .join(", "); + out.push_str(&format!(" - wikilinks: {links}\n")); + } + } + out + } +} + +impl fmt::Display for IndexSummary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Vault: {}", self.vault_path.display())?; + writeln!(f, "Notes indexed: {}", self.notes_indexed)?; + writeln!(f, "Markdown files: {}", self.markdown_files)?; + writeln!(f, "Headings parsed: {}", self.headings)?; + writeln!(f, "Markdown blocks: {}", self.blocks)?; + writeln!(f, "Wikilinks: {}", self.wikilinks)?; + writeln!(f, "Skipped dirs: {}", self.skipped_dirs.len()) + } +} + +fn read_note(path: &Path, vault_path: &Path) -> Result { + let content = + fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?; + let metadata = + fs::metadata(path).with_context(|| format!("failed to stat {}", path.display()))?; + let relative_path = path.strip_prefix(vault_path).unwrap_or(path).to_path_buf(); + let source_path = relative_path.to_string_lossy().replace('\\', "/"); + let parsed = parse_markdown(&source_path, &content); + + Ok(NoteMetadata { + path: relative_path, + filename: path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or_default() + .to_string(), + title: extract_title(path, &parsed.headings), + modified_unix_secs: metadata + .modified() + .ok() + .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok()) + .map(|duration| duration.as_secs()), + file_size: metadata.len(), + headings: parsed.headings, + blocks: parsed.blocks, + wikilinks: parsed.wikilinks, + }) +} + +fn extract_title(path: &Path, headings: &[String]) -> String { + headings.first().cloned().unwrap_or_else(|| { + path.file_stem() + .and_then(|stem| stem.to_str()) + .unwrap_or("Untitled") + .to_string() + }) +} + +fn should_enter( + entry: &DirEntry, + vault_path: &Path, + config: &Config, + skipped_dirs: &mut Vec, +) -> bool { + if !entry.file_type().is_dir() { + return true; + } + + let relative = entry + .path() + .strip_prefix(vault_path) + .unwrap_or(entry.path()); + if relative.as_os_str().is_empty() { + return true; + } + + let normalized = relative.to_string_lossy().replace('\\', "/"); + let ignored = config + .index + .ignore_dirs + .iter() + .any(|ignore| normalized == *ignore || normalized.starts_with(&format!("{ignore}/"))); + + let agent_excluded = !config.index.include_agent_dir + && normalized + .split('/') + .next() + .is_some_and(|component| component == config.writes.agent_dir.to_string_lossy()); + + if ignored || agent_excluded { + skipped_dirs.push(relative.to_path_buf()); + false + } else { + true + } +} + +fn is_markdown(path: &Path) -> bool { + path.extension() + .and_then(|extension| extension.to_str()) + .is_some_and(|extension| extension.eq_ignore_ascii_case("md")) +} + +fn query_terms(query: &str) -> Vec { + query + .split_whitespace() + .map(|term| { + term.trim_matches(|c: char| !c.is_alphanumeric()) + .to_lowercase() + }) + .filter(|term| !term.is_empty()) + .collect() +}