mirror of
https://github.com/khodges42/glassMind.git
synced 2026-06-14 18:18:36 +00:00
next chunk of the boring but important indexing layer
This commit is contained in:
parent
fc9b2efd0b
commit
9fb82b5324
176
Cargo.lock
generated
176
Cargo.lock
generated
|
|
@ -73,6 +73,25 @@ version = "2.11.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-buffer"
|
||||||
|
version = "0.10.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.2.62"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98"
|
||||||
|
dependencies = [
|
||||||
|
"find-msvc-tools",
|
||||||
|
"shlex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
|
@ -125,12 +144,75 @@ version = "1.0.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cpufeatures"
|
||||||
|
version = "0.2.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crypto-common"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
"typenum",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "digest"
|
||||||
|
version = "0.10.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||||
|
dependencies = [
|
||||||
|
"block-buffer",
|
||||||
|
"crypto-common",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fallible-iterator"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fallible-streaming-iterator"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "find-msvc-tools"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "foldhash"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "generic-array"
|
||||||
|
version = "0.14.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||||
|
dependencies = [
|
||||||
|
"typenum",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getopts"
|
name = "getopts"
|
||||||
version = "0.2.24"
|
version = "0.2.24"
|
||||||
|
|
@ -148,20 +230,40 @@ dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"pulldown-cmark",
|
"pulldown-cmark",
|
||||||
"regex",
|
"regex",
|
||||||
|
"rusqlite",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sha2",
|
||||||
"toml",
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.15.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||||
|
dependencies = [
|
||||||
|
"foldhash",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.17.1"
|
version = "0.17.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
|
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashlink"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown 0.15.5",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
|
|
@ -175,7 +277,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
|
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"hashbrown",
|
"hashbrown 0.17.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -196,6 +298,23 @@ version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.186"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libsqlite3-sys"
|
||||||
|
version = "0.35.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.29"
|
version = "0.4.29"
|
||||||
|
|
@ -244,6 +363,12 @@ version = "0.2.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.33"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.106"
|
version = "1.0.106"
|
||||||
|
|
@ -310,6 +435,20 @@ version = "0.8.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rusqlite"
|
||||||
|
version = "0.37.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"fallible-iterator",
|
||||||
|
"fallible-streaming-iterator",
|
||||||
|
"hashlink",
|
||||||
|
"libsqlite3-sys",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
|
|
@ -371,6 +510,17 @@ dependencies = [
|
||||||
"serde_core",
|
"serde_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sha2"
|
||||||
|
version = "0.10.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"cpufeatures",
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sharded-slab"
|
name = "sharded-slab"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
|
|
@ -380,6 +530,12 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shlex"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "smallvec"
|
||||||
version = "1.15.1"
|
version = "1.15.1"
|
||||||
|
|
@ -512,6 +668,12 @@ dependencies = [
|
||||||
"tracing-log",
|
"tracing-log",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typenum"
|
||||||
|
version = "1.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicase"
|
name = "unicase"
|
||||||
version = "2.9.0"
|
version = "2.9.0"
|
||||||
|
|
@ -542,6 +704,18 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "vcpkg"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.5.0"
|
version = "2.5.0"
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,10 @@ anyhow = "1.0"
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
pulldown-cmark = "0.13"
|
pulldown-cmark = "0.13"
|
||||||
regex = "1.11"
|
regex = "1.11"
|
||||||
|
rusqlite = { version = "0.37", features = ["bundled"] }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
sha2 = "0.10"
|
||||||
toml = "0.9"
|
toml = "0.9"
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
|
|
|
||||||
|
|
@ -232,7 +232,7 @@ Store:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## [ ] GM-011 — Extract tags
|
## [x] GM-011 — Extract tags
|
||||||
|
|
||||||
### Goals
|
### Goals
|
||||||
Parse tags from notes.
|
Parse tags from notes.
|
||||||
|
|
@ -254,7 +254,7 @@ Normalize:
|
||||||
|
|
||||||
# Phase 3 — Database Layer
|
# Phase 3 — Database Layer
|
||||||
|
|
||||||
## [ ] GM-012 — Add SQLite integration
|
## [x] GM-012 — Add SQLite integration
|
||||||
|
|
||||||
### Goals
|
### Goals
|
||||||
Create local metadata database.
|
Create local metadata database.
|
||||||
|
|
@ -271,7 +271,7 @@ Create local metadata database.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## [ ] GM-013 — Create notes table
|
## [x] GM-013 — Create notes table
|
||||||
|
|
||||||
### Goals
|
### Goals
|
||||||
Store note metadata.
|
Store note metadata.
|
||||||
|
|
@ -289,7 +289,7 @@ Create schema for:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## [ ] GM-014 — Create chunks table
|
## [x] GM-014 — Create chunks table
|
||||||
|
|
||||||
### Goals
|
### Goals
|
||||||
Store retrieval chunks.
|
Store retrieval chunks.
|
||||||
|
|
@ -308,7 +308,7 @@ Store:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## [ ] GM-015 — Add content hashing
|
## [x] GM-015 — Add content hashing
|
||||||
|
|
||||||
### Goals
|
### Goals
|
||||||
Detect changed notes efficiently.
|
Detect changed notes efficiently.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
[vault]
|
[vault]
|
||||||
path = "."
|
path = "."
|
||||||
|
|
||||||
|
[database]
|
||||||
|
path = ".agent/cache/glassmind.sqlite3"
|
||||||
|
|
||||||
[index]
|
[index]
|
||||||
include_agent_dir = true
|
include_agent_dir = true
|
||||||
ignore_dirs = [
|
ignore_dirs = [
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub vault: VaultConfig,
|
pub vault: VaultConfig,
|
||||||
|
pub database: DatabaseConfig,
|
||||||
pub index: IndexConfig,
|
pub index: IndexConfig,
|
||||||
pub embeddings: EmbeddingsConfig,
|
pub embeddings: EmbeddingsConfig,
|
||||||
pub search: SearchConfig,
|
pub search: SearchConfig,
|
||||||
|
|
@ -19,6 +20,11 @@ pub struct VaultConfig {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||||
|
pub struct DatabaseConfig {
|
||||||
|
pub path: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||||
pub struct IndexConfig {
|
pub struct IndexConfig {
|
||||||
pub include_agent_dir: bool,
|
pub include_agent_dir: bool,
|
||||||
|
|
@ -93,6 +99,9 @@ impl Config {
|
||||||
if self.server.port == 0 {
|
if self.server.port == 0 {
|
||||||
bail!("server.port must be greater than zero");
|
bail!("server.port must be greater than zero");
|
||||||
}
|
}
|
||||||
|
if self.database.path.as_os_str().is_empty() {
|
||||||
|
bail!("database.path must not be empty");
|
||||||
|
}
|
||||||
match self.writes.mode.as_str() {
|
match self.writes.mode.as_str() {
|
||||||
"off" | "agent-only" | "propose" | "allow" => {}
|
"off" | "agent-only" | "propose" | "allow" => {}
|
||||||
other => {
|
other => {
|
||||||
|
|
@ -138,6 +147,9 @@ impl Default for Config {
|
||||||
vault: VaultConfig {
|
vault: VaultConfig {
|
||||||
path: PathBuf::from("."),
|
path: PathBuf::from("."),
|
||||||
},
|
},
|
||||||
|
database: DatabaseConfig {
|
||||||
|
path: PathBuf::from(".agent/cache/glassmind.sqlite3"),
|
||||||
|
},
|
||||||
index: IndexConfig {
|
index: IndexConfig {
|
||||||
include_agent_dir: true,
|
include_agent_dir: true,
|
||||||
ignore_dirs: vec![
|
ignore_dirs: vec![
|
||||||
|
|
|
||||||
277
src/db.rs
Normal file
277
src/db.rs
Normal file
|
|
@ -0,0 +1,277 @@
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use rusqlite::{Connection, OptionalExtension, params};
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
use crate::markdown::MarkdownBlockKind;
|
||||||
|
use crate::vault::{IndexWriteSummary, NoteMetadata, VaultIndex};
|
||||||
|
|
||||||
|
pub struct IndexStore {
|
||||||
|
conn: Connection,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IndexStore {
|
||||||
|
pub fn open(path: &Path) -> Result<Self> {
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
fs::create_dir_all(parent)
|
||||||
|
.with_context(|| format!("failed to create db dir {}", parent.display()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let conn = Connection::open(path)
|
||||||
|
.with_context(|| format!("failed to open sqlite db {}", path.display()))?;
|
||||||
|
let store = Self { conn };
|
||||||
|
store.bootstrap()?;
|
||||||
|
Ok(store)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_index(&mut self, index: &VaultIndex) -> Result<IndexWriteSummary> {
|
||||||
|
let tx = self.conn.transaction()?;
|
||||||
|
let mut summary = IndexWriteSummary::default();
|
||||||
|
|
||||||
|
// This is a rebuildable cache, so changed notes get their child rows replaced in place.
|
||||||
|
for note in &index.notes {
|
||||||
|
summary.notes_seen += 1;
|
||||||
|
let existing_hash = existing_note_hash(&tx, ¬e.path)?;
|
||||||
|
if existing_hash.as_deref() == Some(note.content_hash.as_str()) {
|
||||||
|
summary.unchanged_notes += 1;
|
||||||
|
debug!(path = %note.path.display(), "skipping unchanged note");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
summary.changed_notes += 1;
|
||||||
|
let note_id = upsert_note(&tx, note)?;
|
||||||
|
clear_note_children(&tx, note_id)?;
|
||||||
|
insert_chunks(&tx, note_id, note, &mut summary)?;
|
||||||
|
insert_tags(&tx, note_id, note, &mut summary)?;
|
||||||
|
insert_links(&tx, note_id, note, &mut summary)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
tx.commit()?;
|
||||||
|
Ok(summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bootstrap(&self) -> Result<()> {
|
||||||
|
self.conn.execute_batch(
|
||||||
|
r#"
|
||||||
|
PRAGMA foreign_keys = ON;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS migrations (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
applied_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS notes (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
path TEXT NOT NULL UNIQUE,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
modified_unix_secs INTEGER,
|
||||||
|
file_size INTEGER NOT NULL,
|
||||||
|
content_hash TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS chunks (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
note_id INTEGER NOT NULL,
|
||||||
|
chunk_index INTEGER NOT NULL,
|
||||||
|
heading_path TEXT NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
chunk_type TEXT NOT NULL,
|
||||||
|
start_line INTEGER NOT NULL,
|
||||||
|
end_line INTEGER NOT NULL,
|
||||||
|
token_estimate INTEGER NOT NULL,
|
||||||
|
content_hash TEXT NOT NULL,
|
||||||
|
FOREIGN KEY(note_id) REFERENCES notes(id) ON DELETE CASCADE,
|
||||||
|
UNIQUE(note_id, chunk_index)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS tags (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL UNIQUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS note_tags (
|
||||||
|
note_id INTEGER NOT NULL,
|
||||||
|
tag_id INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY(note_id) REFERENCES notes(id) ON DELETE CASCADE,
|
||||||
|
FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE,
|
||||||
|
PRIMARY KEY(note_id, tag_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS links (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
source_note_id INTEGER NOT NULL,
|
||||||
|
target TEXT NOT NULL,
|
||||||
|
alias TEXT,
|
||||||
|
link_type TEXT NOT NULL DEFAULT 'wikilink',
|
||||||
|
FOREIGN KEY(source_note_id) REFERENCES notes(id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT OR IGNORE INTO migrations (id, name) VALUES (1, 'initial_metadata_index');
|
||||||
|
"#,
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn existing_note_hash(conn: &Connection, path: &Path) -> Result<Option<String>> {
|
||||||
|
conn.query_row(
|
||||||
|
"SELECT content_hash FROM notes WHERE path = ?1",
|
||||||
|
[path_to_db(path)],
|
||||||
|
|row| row.get(0),
|
||||||
|
)
|
||||||
|
.optional()
|
||||||
|
.context("failed to read existing note hash")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn upsert_note(conn: &Connection, note: &NoteMetadata) -> Result<i64> {
|
||||||
|
conn.execute(
|
||||||
|
r#"
|
||||||
|
INSERT INTO notes (
|
||||||
|
path,
|
||||||
|
filename,
|
||||||
|
title,
|
||||||
|
modified_unix_secs,
|
||||||
|
file_size,
|
||||||
|
content_hash,
|
||||||
|
updated_at
|
||||||
|
)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT(path) DO UPDATE SET
|
||||||
|
filename = excluded.filename,
|
||||||
|
title = excluded.title,
|
||||||
|
modified_unix_secs = excluded.modified_unix_secs,
|
||||||
|
file_size = excluded.file_size,
|
||||||
|
content_hash = excluded.content_hash,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
"#,
|
||||||
|
params![
|
||||||
|
path_to_db(¬e.path),
|
||||||
|
note.filename,
|
||||||
|
note.title,
|
||||||
|
note.modified_unix_secs,
|
||||||
|
note.file_size,
|
||||||
|
note.content_hash,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
conn.query_row(
|
||||||
|
"SELECT id FROM notes WHERE path = ?1",
|
||||||
|
[path_to_db(¬e.path)],
|
||||||
|
|row| row.get(0),
|
||||||
|
)
|
||||||
|
.context("failed to read upserted note id")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clear_note_children(conn: &Connection, note_id: i64) -> Result<()> {
|
||||||
|
conn.execute("DELETE FROM chunks WHERE note_id = ?1", [note_id])?;
|
||||||
|
conn.execute("DELETE FROM note_tags WHERE note_id = ?1", [note_id])?;
|
||||||
|
conn.execute("DELETE FROM links WHERE source_note_id = ?1", [note_id])?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_chunks(
|
||||||
|
conn: &Connection,
|
||||||
|
note_id: i64,
|
||||||
|
note: &NoteMetadata,
|
||||||
|
summary: &mut IndexWriteSummary,
|
||||||
|
) -> Result<()> {
|
||||||
|
for (idx, block) in note.blocks.iter().enumerate() {
|
||||||
|
if block.text.trim().is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
r#"
|
||||||
|
INSERT INTO chunks (
|
||||||
|
note_id,
|
||||||
|
chunk_index,
|
||||||
|
heading_path,
|
||||||
|
content,
|
||||||
|
chunk_type,
|
||||||
|
start_line,
|
||||||
|
end_line,
|
||||||
|
token_estimate,
|
||||||
|
content_hash
|
||||||
|
)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)
|
||||||
|
"#,
|
||||||
|
params![
|
||||||
|
note_id,
|
||||||
|
idx as i64,
|
||||||
|
block.heading_path.join(" > "),
|
||||||
|
block.text,
|
||||||
|
chunk_type(&block.kind),
|
||||||
|
block.start_line as i64,
|
||||||
|
block.end_line as i64,
|
||||||
|
estimate_tokens(&block.text) as i64,
|
||||||
|
sha256_hex(&block.text),
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
summary.chunks_written += 1;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_tags(
|
||||||
|
conn: &Connection,
|
||||||
|
note_id: i64,
|
||||||
|
note: &NoteMetadata,
|
||||||
|
summary: &mut IndexWriteSummary,
|
||||||
|
) -> Result<()> {
|
||||||
|
for tag in ¬e.tags {
|
||||||
|
conn.execute("INSERT OR IGNORE INTO tags (name) VALUES (?1)", [tag])?;
|
||||||
|
let tag_id: i64 = conn.query_row("SELECT id FROM tags WHERE name = ?1", [tag], |row| {
|
||||||
|
row.get(0)
|
||||||
|
})?;
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO note_tags (note_id, tag_id) VALUES (?1, ?2)",
|
||||||
|
params![note_id, tag_id],
|
||||||
|
)?;
|
||||||
|
summary.tags_seen += 1;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_links(
|
||||||
|
conn: &Connection,
|
||||||
|
note_id: i64,
|
||||||
|
note: &NoteMetadata,
|
||||||
|
summary: &mut IndexWriteSummary,
|
||||||
|
) -> Result<()> {
|
||||||
|
for link in ¬e.wikilinks {
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO links (source_note_id, target, alias, link_type) VALUES (?1, ?2, ?3, 'wikilink')",
|
||||||
|
params![note_id, link.target, link.alias],
|
||||||
|
)?;
|
||||||
|
summary.links_written += 1;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sha256_hex(content: &str) -> String {
|
||||||
|
format!("{:x}", Sha256::digest(content.as_bytes()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn estimate_tokens(content: &str) -> usize {
|
||||||
|
content.split_whitespace().count().max(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chunk_type(kind: &MarkdownBlockKind) -> &'static str {
|
||||||
|
match kind {
|
||||||
|
MarkdownBlockKind::Heading => "heading",
|
||||||
|
MarkdownBlockKind::Paragraph => "paragraph",
|
||||||
|
MarkdownBlockKind::CodeBlock => "code_block",
|
||||||
|
MarkdownBlockKind::List => "list",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path_to_db(path: &Path) -> String {
|
||||||
|
PathBuf::from(path).to_string_lossy().replace('\\', "/")
|
||||||
|
}
|
||||||
12
src/main.rs
12
src/main.rs
|
|
@ -1,5 +1,6 @@
|
||||||
mod cli;
|
mod cli;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod db;
|
||||||
mod logging;
|
mod logging;
|
||||||
mod markdown;
|
mod markdown;
|
||||||
mod vault;
|
mod vault;
|
||||||
|
|
@ -10,6 +11,7 @@ use tracing::{debug, info};
|
||||||
|
|
||||||
use crate::cli::{Cli, Commands, OutputFormat};
|
use crate::cli::{Cli, Commands, OutputFormat};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
|
use crate::db::IndexStore;
|
||||||
use crate::vault::VaultIndex;
|
use crate::vault::VaultIndex;
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
|
|
@ -25,10 +27,16 @@ fn main() -> Result<()> {
|
||||||
Commands::Init { force } => init_project(&config, force),
|
Commands::Init { force } => init_project(&config, force),
|
||||||
Commands::Index { json } => {
|
Commands::Index { json } => {
|
||||||
let index = VaultIndex::scan(&config)?;
|
let index = VaultIndex::scan(&config)?;
|
||||||
|
config.create_agent_dirs()?;
|
||||||
|
// Indexing writes the rebuildable cache, while search can still scan live markdown.
|
||||||
|
let db_path = config.vault.path.join(&config.database.path);
|
||||||
|
let mut store = IndexStore::open(&db_path)?;
|
||||||
|
let writes = store.write_index(&index)?;
|
||||||
|
let summary = index.summary_with_writes(writes);
|
||||||
if json {
|
if json {
|
||||||
println!("{}", serde_json::to_string_pretty(&index.summary())?);
|
println!("{}", serde_json::to_string_pretty(&summary)?);
|
||||||
} else {
|
} else {
|
||||||
println!("{}", index.summary());
|
println!("{summary}");
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
125
src/markdown.rs
125
src/markdown.rs
|
|
@ -1,3 +1,5 @@
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
|
|
@ -6,6 +8,7 @@ pub struct MarkdownDocument {
|
||||||
pub headings: Vec<String>,
|
pub headings: Vec<String>,
|
||||||
pub blocks: Vec<MarkdownBlock>,
|
pub blocks: Vec<MarkdownBlock>,
|
||||||
pub wikilinks: Vec<Wikilink>,
|
pub wikilinks: Vec<Wikilink>,
|
||||||
|
pub tags: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
|
@ -14,6 +17,7 @@ pub struct MarkdownBlock {
|
||||||
pub text: String,
|
pub text: String,
|
||||||
pub start_line: usize,
|
pub start_line: usize,
|
||||||
pub end_line: usize,
|
pub end_line: usize,
|
||||||
|
pub heading_path: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
|
@ -42,11 +46,13 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
let mut in_code = false;
|
let mut in_code = false;
|
||||||
let mut code = Vec::new();
|
let mut code = Vec::new();
|
||||||
let mut code_start = 0;
|
let mut code_start = 0;
|
||||||
|
let mut heading_stack: Vec<(usize, String)> = Vec::new();
|
||||||
|
|
||||||
for (idx, line) in content.lines().enumerate() {
|
for (idx, line) in content.lines().enumerate() {
|
||||||
let line_no = idx + 1;
|
let line_no = idx + 1;
|
||||||
let trimmed = line.trim();
|
let trimmed = line.trim();
|
||||||
|
|
||||||
|
// Code fences get kept whole so later chunks stay readable.
|
||||||
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
|
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
|
||||||
if in_code {
|
if in_code {
|
||||||
code.push(line.to_string());
|
code.push(line.to_string());
|
||||||
|
|
@ -55,6 +61,7 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
text: code.join("\n"),
|
text: code.join("\n"),
|
||||||
start_line: code_start,
|
start_line: code_start,
|
||||||
end_line: line_no,
|
end_line: line_no,
|
||||||
|
heading_path: current_heading_path(&heading_stack),
|
||||||
});
|
});
|
||||||
code.clear();
|
code.clear();
|
||||||
in_code = false;
|
in_code = false;
|
||||||
|
|
@ -64,6 +71,7 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
&mut paragraph,
|
&mut paragraph,
|
||||||
paragraph_start,
|
paragraph_start,
|
||||||
line_no.saturating_sub(1),
|
line_no.saturating_sub(1),
|
||||||
|
&heading_stack,
|
||||||
);
|
);
|
||||||
in_code = true;
|
in_code = true;
|
||||||
code_start = line_no;
|
code_start = line_no;
|
||||||
|
|
@ -77,19 +85,28 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(heading) = parse_heading(trimmed) {
|
if let Some((level, heading)) = parse_heading(trimmed) {
|
||||||
flush_paragraph(
|
flush_paragraph(
|
||||||
&mut blocks,
|
&mut blocks,
|
||||||
&mut paragraph,
|
&mut paragraph,
|
||||||
paragraph_start,
|
paragraph_start,
|
||||||
line_no.saturating_sub(1),
|
line_no.saturating_sub(1),
|
||||||
|
&heading_stack,
|
||||||
);
|
);
|
||||||
|
while heading_stack
|
||||||
|
.last()
|
||||||
|
.is_some_and(|(last_level, _)| *last_level >= level)
|
||||||
|
{
|
||||||
|
heading_stack.pop();
|
||||||
|
}
|
||||||
|
heading_stack.push((level, heading.clone()));
|
||||||
headings.push(heading.clone());
|
headings.push(heading.clone());
|
||||||
blocks.push(MarkdownBlock {
|
blocks.push(MarkdownBlock {
|
||||||
kind: MarkdownBlockKind::Heading,
|
kind: MarkdownBlockKind::Heading,
|
||||||
text: heading,
|
text: heading,
|
||||||
start_line: line_no,
|
start_line: line_no,
|
||||||
end_line: line_no,
|
end_line: line_no,
|
||||||
|
heading_path: current_heading_path(&heading_stack),
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -100,12 +117,14 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
&mut paragraph,
|
&mut paragraph,
|
||||||
paragraph_start,
|
paragraph_start,
|
||||||
line_no.saturating_sub(1),
|
line_no.saturating_sub(1),
|
||||||
|
&heading_stack,
|
||||||
);
|
);
|
||||||
blocks.push(MarkdownBlock {
|
blocks.push(MarkdownBlock {
|
||||||
kind: MarkdownBlockKind::List,
|
kind: MarkdownBlockKind::List,
|
||||||
text: trimmed.to_string(),
|
text: trimmed.to_string(),
|
||||||
start_line: line_no,
|
start_line: line_no,
|
||||||
end_line: line_no,
|
end_line: line_no,
|
||||||
|
heading_path: current_heading_path(&heading_stack),
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -116,6 +135,7 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
&mut paragraph,
|
&mut paragraph,
|
||||||
paragraph_start,
|
paragraph_start,
|
||||||
line_no.saturating_sub(1),
|
line_no.saturating_sub(1),
|
||||||
|
&heading_stack,
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -133,14 +153,22 @@ pub fn parse_markdown(source_path: &str, content: &str) -> MarkdownDocument {
|
||||||
text: code.join("\n"),
|
text: code.join("\n"),
|
||||||
start_line: code_start,
|
start_line: code_start,
|
||||||
end_line: final_line,
|
end_line: final_line,
|
||||||
|
heading_path: current_heading_path(&heading_stack),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
flush_paragraph(&mut blocks, &mut paragraph, paragraph_start, final_line);
|
flush_paragraph(
|
||||||
|
&mut blocks,
|
||||||
|
&mut paragraph,
|
||||||
|
paragraph_start,
|
||||||
|
final_line,
|
||||||
|
&heading_stack,
|
||||||
|
);
|
||||||
|
|
||||||
MarkdownDocument {
|
MarkdownDocument {
|
||||||
headings,
|
headings,
|
||||||
blocks,
|
blocks,
|
||||||
wikilinks: extract_wikilinks(source_path, content),
|
wikilinks: extract_wikilinks(source_path, content),
|
||||||
|
tags: extract_tags(content),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -149,6 +177,7 @@ fn flush_paragraph(
|
||||||
paragraph: &mut Vec<String>,
|
paragraph: &mut Vec<String>,
|
||||||
start_line: usize,
|
start_line: usize,
|
||||||
end_line: usize,
|
end_line: usize,
|
||||||
|
heading_stack: &[(usize, String)],
|
||||||
) {
|
) {
|
||||||
if paragraph.is_empty() {
|
if paragraph.is_empty() {
|
||||||
return;
|
return;
|
||||||
|
|
@ -159,19 +188,27 @@ fn flush_paragraph(
|
||||||
text: paragraph.join(" "),
|
text: paragraph.join(" "),
|
||||||
start_line,
|
start_line,
|
||||||
end_line,
|
end_line,
|
||||||
|
heading_path: current_heading_path(heading_stack),
|
||||||
});
|
});
|
||||||
paragraph.clear();
|
paragraph.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_heading(trimmed: &str) -> Option<String> {
|
fn parse_heading(trimmed: &str) -> Option<(usize, String)> {
|
||||||
let hashes = trimmed.chars().take_while(|c| *c == '#').count();
|
let hashes = trimmed.chars().take_while(|c| *c == '#').count();
|
||||||
if (1..=6).contains(&hashes) && trimmed.chars().nth(hashes) == Some(' ') {
|
if (1..=6).contains(&hashes) && trimmed.chars().nth(hashes) == Some(' ') {
|
||||||
Some(trimmed[hashes + 1..].trim().to_string())
|
Some((hashes, trimmed[hashes + 1..].trim().to_string()))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn current_heading_path(heading_stack: &[(usize, String)]) -> Vec<String> {
|
||||||
|
heading_stack
|
||||||
|
.iter()
|
||||||
|
.map(|(_, heading)| heading.clone())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn is_list_item(trimmed: &str) -> bool {
|
fn is_list_item(trimmed: &str) -> bool {
|
||||||
trimmed.starts_with("- ")
|
trimmed.starts_with("- ")
|
||||||
|| trimmed.starts_with("* ")
|
|| trimmed.starts_with("* ")
|
||||||
|
|
@ -203,9 +240,78 @@ pub fn extract_wikilinks(source_path: &str, content: &str) -> Vec<Wikilink> {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn extract_tags(content: &str) -> Vec<String> {
|
||||||
|
let mut tags = BTreeSet::new();
|
||||||
|
// Frontmatter and inline tags meet here, then we normalize once.
|
||||||
|
for tag in extract_frontmatter_tags(content)
|
||||||
|
.into_iter()
|
||||||
|
.chain(extract_inline_tags(content))
|
||||||
|
{
|
||||||
|
let normalized = normalize_tag(&tag);
|
||||||
|
if !normalized.is_empty() {
|
||||||
|
tags.insert(normalized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tags.into_iter().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_frontmatter_tags(content: &str) -> Vec<String> {
|
||||||
|
let mut tags = Vec::new();
|
||||||
|
let mut lines = content.lines();
|
||||||
|
if lines.next() != Some("---") {
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut in_tags_list = false;
|
||||||
|
for line in lines {
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed == "---" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(value) = trimmed.strip_prefix("tags:") {
|
||||||
|
in_tags_list = true;
|
||||||
|
tags.extend(split_tag_values(value));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if in_tags_list && trimmed.starts_with('-') {
|
||||||
|
tags.push(trimmed.trim_start_matches('-').trim().to_string());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !trimmed.is_empty() && !trimmed.starts_with('#') {
|
||||||
|
in_tags_list = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tags
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_inline_tags(content: &str) -> Vec<String> {
|
||||||
|
let tag_re = Regex::new(r"(?m)(^|[\s(\[{])#([A-Za-z0-9_/-]+)").expect("valid tag regex");
|
||||||
|
tag_re
|
||||||
|
.captures_iter(content)
|
||||||
|
.filter_map(|capture| capture.get(2).map(|tag| tag.as_str().to_string()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_tag_values(value: &str) -> Vec<String> {
|
||||||
|
let value = value.trim().trim_start_matches('[').trim_end_matches(']');
|
||||||
|
value
|
||||||
|
.split(',')
|
||||||
|
.map(|tag| tag.trim().trim_matches('"').trim_matches('\'').to_string())
|
||||||
|
.filter(|tag| !tag.is_empty())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_tag(tag: &str) -> String {
|
||||||
|
tag.trim().trim_start_matches('#').trim().to_lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{MarkdownBlockKind, extract_wikilinks, parse_markdown};
|
use super::{MarkdownBlockKind, extract_tags, extract_wikilinks, parse_markdown};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn extracts_obsidian_wikilink_forms() {
|
fn extracts_obsidian_wikilink_forms() {
|
||||||
|
|
@ -251,4 +357,13 @@ mod tests {
|
||||||
.any(|block| matches!(block.kind, MarkdownBlockKind::CodeBlock))
|
.any(|block| matches!(block.kind, MarkdownBlockKind::CodeBlock))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_and_normalizes_tags() {
|
||||||
|
let tags = extract_tags(
|
||||||
|
"---\ntags: [Rust, glassmind]\n---\nBody #Rust #local-first\n# Heading is not a tag\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(tags, vec!["glassmind", "local-first", "rust"]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
36
src/vault.rs
36
src/vault.rs
|
|
@ -9,6 +9,7 @@ use tracing::{debug, warn};
|
||||||
use walkdir::{DirEntry, WalkDir};
|
use walkdir::{DirEntry, WalkDir};
|
||||||
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
|
use crate::db::sha256_hex;
|
||||||
use crate::markdown::{MarkdownBlock, Wikilink, parse_markdown};
|
use crate::markdown::{MarkdownBlock, Wikilink, parse_markdown};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
|
@ -26,9 +27,11 @@ pub struct NoteMetadata {
|
||||||
pub title: String,
|
pub title: String,
|
||||||
pub modified_unix_secs: Option<u64>,
|
pub modified_unix_secs: Option<u64>,
|
||||||
pub file_size: u64,
|
pub file_size: u64,
|
||||||
|
pub content_hash: String,
|
||||||
pub headings: Vec<String>,
|
pub headings: Vec<String>,
|
||||||
pub blocks: Vec<MarkdownBlock>,
|
pub blocks: Vec<MarkdownBlock>,
|
||||||
pub wikilinks: Vec<Wikilink>,
|
pub wikilinks: Vec<Wikilink>,
|
||||||
|
pub tags: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
|
@ -39,7 +42,19 @@ pub struct IndexSummary {
|
||||||
pub headings: usize,
|
pub headings: usize,
|
||||||
pub blocks: usize,
|
pub blocks: usize,
|
||||||
pub wikilinks: usize,
|
pub wikilinks: usize,
|
||||||
|
pub tags: usize,
|
||||||
pub skipped_dirs: Vec<PathBuf>,
|
pub skipped_dirs: Vec<PathBuf>,
|
||||||
|
pub writes: Option<IndexWriteSummary>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default, Serialize)]
|
||||||
|
pub struct IndexWriteSummary {
|
||||||
|
pub notes_seen: usize,
|
||||||
|
pub changed_notes: usize,
|
||||||
|
pub unchanged_notes: usize,
|
||||||
|
pub chunks_written: usize,
|
||||||
|
pub tags_seen: usize,
|
||||||
|
pub links_written: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
|
@ -115,7 +130,16 @@ impl VaultIndex {
|
||||||
headings: self.notes.iter().map(|note| note.headings.len()).sum(),
|
headings: self.notes.iter().map(|note| note.headings.len()).sum(),
|
||||||
blocks: self.notes.iter().map(|note| note.blocks.len()).sum(),
|
blocks: self.notes.iter().map(|note| note.blocks.len()).sum(),
|
||||||
wikilinks: self.notes.iter().map(|note| note.wikilinks.len()).sum(),
|
wikilinks: self.notes.iter().map(|note| note.wikilinks.len()).sum(),
|
||||||
|
tags: self.notes.iter().map(|note| note.tags.len()).sum(),
|
||||||
skipped_dirs: self.skipped_dirs.clone(),
|
skipped_dirs: self.skipped_dirs.clone(),
|
||||||
|
writes: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn summary_with_writes(&self, writes: IndexWriteSummary) -> IndexSummary {
|
||||||
|
IndexSummary {
|
||||||
|
writes: Some(writes),
|
||||||
|
..self.summary()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -213,7 +237,14 @@ impl fmt::Display for IndexSummary {
|
||||||
writeln!(f, "Headings parsed: {}", self.headings)?;
|
writeln!(f, "Headings parsed: {}", self.headings)?;
|
||||||
writeln!(f, "Markdown blocks: {}", self.blocks)?;
|
writeln!(f, "Markdown blocks: {}", self.blocks)?;
|
||||||
writeln!(f, "Wikilinks: {}", self.wikilinks)?;
|
writeln!(f, "Wikilinks: {}", self.wikilinks)?;
|
||||||
writeln!(f, "Skipped dirs: {}", self.skipped_dirs.len())
|
writeln!(f, "Tags: {}", self.tags)?;
|
||||||
|
writeln!(f, "Skipped dirs: {}", self.skipped_dirs.len())?;
|
||||||
|
if let Some(writes) = &self.writes {
|
||||||
|
writeln!(f, "Changed notes: {}", writes.changed_notes)?;
|
||||||
|
writeln!(f, "Unchanged notes skipped: {}", writes.unchanged_notes)?;
|
||||||
|
writeln!(f, "Chunks written: {}", writes.chunks_written)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -225,6 +256,7 @@ fn read_note(path: &Path, vault_path: &Path) -> Result<NoteMetadata> {
|
||||||
let relative_path = path.strip_prefix(vault_path).unwrap_or(path).to_path_buf();
|
let relative_path = path.strip_prefix(vault_path).unwrap_or(path).to_path_buf();
|
||||||
let source_path = relative_path.to_string_lossy().replace('\\', "/");
|
let source_path = relative_path.to_string_lossy().replace('\\', "/");
|
||||||
let parsed = parse_markdown(&source_path, &content);
|
let parsed = parse_markdown(&source_path, &content);
|
||||||
|
let content_hash = sha256_hex(&content);
|
||||||
|
|
||||||
Ok(NoteMetadata {
|
Ok(NoteMetadata {
|
||||||
path: relative_path,
|
path: relative_path,
|
||||||
|
|
@ -240,9 +272,11 @@ fn read_note(path: &Path, vault_path: &Path) -> Result<NoteMetadata> {
|
||||||
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
|
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
|
||||||
.map(|duration| duration.as_secs()),
|
.map(|duration| duration.as_secs()),
|
||||||
file_size: metadata.len(),
|
file_size: metadata.len(),
|
||||||
|
content_hash,
|
||||||
headings: parsed.headings,
|
headings: parsed.headings,
|
||||||
blocks: parsed.blocks,
|
blocks: parsed.blocks,
|
||||||
wikilinks: parsed.wikilinks,
|
wikilinks: parsed.wikilinks,
|
||||||
|
tags: parsed.tags,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user