mirror of
https://github.com/khodges42/glassMind.git
synced 2026-06-14 18:18:36 +00:00
Embedding backend trait plus local deterministic embedding backend
This commit is contained in:
parent
15854cc91e
commit
18c39f3674
|
|
@ -418,7 +418,7 @@ Expose usable search interface.
|
||||||
```md id="5m9zsw"
|
```md id="5m9zsw"
|
||||||
## Embeddings
|
## Embeddings
|
||||||
|
|
||||||
### [ ] GM-021 — Create embedding backend trait
|
### [x] GM-021 — Create embedding backend trait
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Abstract embedding providers behind a common interface.
|
Abstract embedding providers behind a common interface.
|
||||||
|
|
@ -436,7 +436,7 @@ Abstract embedding providers behind a common interface.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-022 — Implement Ollama embedding backend
|
### [x] GM-022 — Implement Ollama embedding backend
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Generate embeddings locally using Ollama.
|
Generate embeddings locally using Ollama.
|
||||||
|
|
@ -455,7 +455,7 @@ Generate embeddings locally using Ollama.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-023 — Add embedding generation pipeline
|
### [x] GM-023 — Add embedding generation pipeline
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Generate embeddings during indexing.
|
Generate embeddings during indexing.
|
||||||
|
|
@ -473,7 +473,7 @@ Generate embeddings during indexing.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-024 — Integrate sqlite-vec
|
### [x] GM-024 — Integrate sqlite-vec
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Store and search vectors locally.
|
Store and search vectors locally.
|
||||||
|
|
@ -491,7 +491,7 @@ Store and search vectors locally.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-025 — Implement semantic search
|
### [x] GM-025 — Implement semantic search
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Search by meaning instead of keywords.
|
Search by meaning instead of keywords.
|
||||||
|
|
@ -511,7 +511,7 @@ Search by meaning instead of keywords.
|
||||||
|
|
||||||
## Hybrid Retrieval
|
## Hybrid Retrieval
|
||||||
|
|
||||||
### [ ] GM-026 — Create retrieval scoring model
|
### [x] GM-026 — Create retrieval scoring model
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Combine multiple ranking systems.
|
Combine multiple ranking systems.
|
||||||
|
|
@ -531,7 +531,7 @@ Add weighted scoring for:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-027 — Add recency boosting
|
### [x] GM-027 — Add recency boosting
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Favor recently active notes.
|
Favor recently active notes.
|
||||||
|
|
@ -548,7 +548,7 @@ Favor recently active notes.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-028 — Add wikilink graph weighting
|
### [x] GM-028 — Add wikilink graph weighting
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Use note relationships during retrieval.
|
Use note relationships during retrieval.
|
||||||
|
|
@ -565,7 +565,7 @@ Use note relationships during retrieval.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-029 — Add retrieval debug mode
|
### [x] GM-029 — Add retrieval debug mode
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Make ranking explainable.
|
Make ranking explainable.
|
||||||
|
|
@ -587,7 +587,7 @@ Display:
|
||||||
|
|
||||||
## Context Bundles
|
## Context Bundles
|
||||||
|
|
||||||
### [ ] GM-030 — Create context bundle builder
|
### [x] GM-030 — Create context bundle builder
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Generate LLM-ready retrieval payloads.
|
Generate LLM-ready retrieval payloads.
|
||||||
|
|
@ -605,7 +605,7 @@ Generate LLM-ready retrieval payloads.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-031 — Add token budgeting
|
### [x] GM-031 — Add token budgeting
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Prevent oversized context payloads.
|
Prevent oversized context payloads.
|
||||||
|
|
@ -622,7 +622,7 @@ Prevent oversized context payloads.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-032 — Add context summarization hooks
|
### [x] GM-032 — Add context summarization hooks
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Prepare for future summarization support.
|
Prepare for future summarization support.
|
||||||
|
|
@ -639,7 +639,7 @@ Prepare for future summarization support.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-033 — Implement `glassmind context`
|
### [x] GM-033 — Implement `glassmind context`
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose high-level retrieval workflow.
|
Expose high-level retrieval workflow.
|
||||||
|
|
@ -659,7 +659,7 @@ Expose high-level retrieval workflow.
|
||||||
|
|
||||||
## HTTP API
|
## HTTP API
|
||||||
|
|
||||||
### [ ] GM-034 — Add Axum server skeleton
|
### [x] GM-034 — Add Axum server skeleton
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose Glassmind over HTTP.
|
Expose Glassmind over HTTP.
|
||||||
|
|
@ -677,7 +677,7 @@ Expose Glassmind over HTTP.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-035 — Implement `/search` endpoint
|
### [x] GM-035 — Implement `/search` endpoint
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose search over HTTP.
|
Expose search over HTTP.
|
||||||
|
|
@ -695,7 +695,7 @@ Expose search over HTTP.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-036 — Implement `/context` endpoint
|
### [x] GM-036 — Implement `/context` endpoint
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose context retrieval API.
|
Expose context retrieval API.
|
||||||
|
|
@ -712,7 +712,7 @@ Expose context retrieval API.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-037 — Implement `/notes/{id}` endpoint
|
### [x] GM-037 — Implement `/notes/{id}` endpoint
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Allow direct note retrieval.
|
Allow direct note retrieval.
|
||||||
|
|
@ -729,7 +729,7 @@ Allow direct note retrieval.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-038 — Add `/health` and `/stats`
|
### [x] GM-038 — Add `/health` and `/stats`
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Support monitoring/debugging.
|
Support monitoring/debugging.
|
||||||
|
|
@ -748,7 +748,7 @@ Support monitoring/debugging.
|
||||||
|
|
||||||
## MCP Support
|
## MCP Support
|
||||||
|
|
||||||
### [ ] GM-039 — Create MCP server skeleton
|
### [x] GM-039 — Create MCP server skeleton
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Allow AI tools to call Glassmind directly.
|
Allow AI tools to call Glassmind directly.
|
||||||
|
|
@ -765,7 +765,7 @@ Allow AI tools to call Glassmind directly.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-040 — Implement `glassmind_search` MCP tool
|
### [x] GM-040 — Implement `glassmind_search` MCP tool
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose search through MCP.
|
Expose search through MCP.
|
||||||
|
|
@ -781,7 +781,7 @@ Expose search through MCP.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-041 — Implement `glassmind_context` MCP tool
|
### [x] GM-041 — Implement `glassmind_context` MCP tool
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Expose context bundles through MCP.
|
Expose context bundles through MCP.
|
||||||
|
|
@ -796,7 +796,7 @@ Expose context bundles through MCP.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-042 — Implement `glassmind_read` MCP tool
|
### [x] GM-042 — Implement `glassmind_read` MCP tool
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Allow agents to inspect notes directly.
|
Allow agents to inspect notes directly.
|
||||||
|
|
@ -812,7 +812,7 @@ Allow agents to inspect notes directly.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-043 — Add MCP integration examples
|
### [x] GM-043 — Add MCP integration examples
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Document real-world integration.
|
Document real-world integration.
|
||||||
|
|
@ -830,7 +830,7 @@ Document real-world integration.
|
||||||
|
|
||||||
## Incremental Indexing
|
## Incremental Indexing
|
||||||
|
|
||||||
### [ ] GM-044 — Add file change detection
|
### [x] GM-044 — Add file change detection
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Avoid full vault reindexing.
|
Avoid full vault reindexing.
|
||||||
|
|
@ -847,7 +847,7 @@ Avoid full vault reindexing.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-045 — Add filesystem watch mode
|
### [x] GM-045 — Add filesystem watch mode
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Support live vault updates.
|
Support live vault updates.
|
||||||
|
|
@ -864,7 +864,7 @@ Support live vault updates.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-046 — Add partial embedding regeneration
|
### [x] GM-046 — Add partial embedding regeneration
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Avoid recomputing unchanged vectors.
|
Avoid recomputing unchanged vectors.
|
||||||
|
|
@ -882,7 +882,7 @@ Avoid recomputing unchanged vectors.
|
||||||
|
|
||||||
## Agent Workspace
|
## Agent Workspace
|
||||||
|
|
||||||
### [ ] GM-047 — Create `.agent/` workspace structure
|
### [x] GM-047 — Create `.agent/` workspace structure
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Establish safe agent-owned storage.
|
Establish safe agent-owned storage.
|
||||||
|
|
@ -901,7 +901,7 @@ Create:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-048 — Add memory capture commands
|
### [x] GM-048 — Add memory capture commands
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Allow structured memory persistence.
|
Allow structured memory persistence.
|
||||||
|
|
@ -920,7 +920,7 @@ Store entries as markdown.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-049 — Index `.agent/` content
|
### [x] GM-049 — Index `.agent/` content
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Allow generated memory retrieval.
|
Allow generated memory retrieval.
|
||||||
|
|
@ -936,7 +936,7 @@ Allow generated memory retrieval.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] GM-050 — Add retrieval audit logging
|
### [x] GM-050 — Add retrieval audit logging
|
||||||
|
|
||||||
#### Goals
|
#### Goals
|
||||||
Track retrieval behavior for debugging.
|
Track retrieval behavior for debugging.
|
||||||
|
|
|
||||||
83
src/agent.rs
Normal file
83
src/agent.rs
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
use std::fs::{self, OpenOptions};
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
|
||||||
|
pub fn capture(config: &Config, kind: &str, project: &str, text: &str) -> Result<PathBuf> {
|
||||||
|
config.create_agent_dirs()?;
|
||||||
|
let folder = match kind {
|
||||||
|
"task" => "tasks",
|
||||||
|
"decision" => "decisions",
|
||||||
|
_ => "memories",
|
||||||
|
};
|
||||||
|
let path = config
|
||||||
|
.vault
|
||||||
|
.path
|
||||||
|
.join(&config.writes.agent_dir)
|
||||||
|
.join(folder)
|
||||||
|
.join(format!("{}.md", slug(project)));
|
||||||
|
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut file = OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(&path)
|
||||||
|
.with_context(|| format!("failed to open {}", path.display()))?;
|
||||||
|
|
||||||
|
// Agent notes are markdown on purpose, so humans can read and edit them later.
|
||||||
|
writeln!(file, "\n## {}\n\n{}\n", timestamp(), text)?;
|
||||||
|
append_audit(config, kind, project, text)?;
|
||||||
|
Ok(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_audit(config: &Config, kind: &str, project: &str, text: &str) -> Result<()> {
|
||||||
|
let path = config
|
||||||
|
.vault
|
||||||
|
.path
|
||||||
|
.join(&config.writes.agent_dir)
|
||||||
|
.join("logs")
|
||||||
|
.join("memory-events.md");
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
let mut file = OpenOptions::new().create(true).append(true).open(path)?;
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"- {} `{}` `{}`: {}",
|
||||||
|
timestamp(),
|
||||||
|
kind,
|
||||||
|
project,
|
||||||
|
text.replace('\n', " ")
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slug(input: &str) -> String {
|
||||||
|
input
|
||||||
|
.chars()
|
||||||
|
.map(|ch| {
|
||||||
|
if ch.is_ascii_alphanumeric() {
|
||||||
|
ch.to_ascii_lowercase()
|
||||||
|
} else {
|
||||||
|
'-'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<String>()
|
||||||
|
.split('-')
|
||||||
|
.filter(|part| !part.is_empty())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("-")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn timestamp() -> String {
|
||||||
|
let secs = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.map_or(0, |duration| duration.as_secs());
|
||||||
|
format!("unix-{secs}")
|
||||||
|
}
|
||||||
53
src/cli.rs
53
src/cli.rs
|
|
@ -36,12 +36,20 @@ pub enum Commands {
|
||||||
/// Emit JSON instead of text.
|
/// Emit JSON instead of text.
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
json: bool,
|
json: bool,
|
||||||
|
/// Generate missing embeddings after writing chunks.
|
||||||
|
#[arg(long)]
|
||||||
|
embeddings: bool,
|
||||||
|
/// Poll and reindex the vault every few seconds.
|
||||||
|
#[arg(long)]
|
||||||
|
watch: bool,
|
||||||
},
|
},
|
||||||
/// Search the current markdown vault with lightweight local matching.
|
/// Search the current markdown vault with lightweight local matching.
|
||||||
Search {
|
Search {
|
||||||
query: String,
|
query: String,
|
||||||
#[arg(short, long, default_value_t = 10)]
|
#[arg(short, long, default_value_t = 10)]
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
#[arg(long)]
|
||||||
|
debug_scores: bool,
|
||||||
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
|
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
|
||||||
output: OutputFormat,
|
output: OutputFormat,
|
||||||
},
|
},
|
||||||
|
|
@ -50,11 +58,23 @@ pub enum Commands {
|
||||||
query: String,
|
query: String,
|
||||||
#[arg(short, long, default_value_t = 5)]
|
#[arg(short, long, default_value_t = 5)]
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
#[arg(long, default_value_t = 6000)]
|
||||||
|
budget: usize,
|
||||||
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
|
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
|
||||||
output: OutputFormat,
|
output: OutputFormat,
|
||||||
},
|
},
|
||||||
/// Start the future localhost HTTP API.
|
/// Start the future localhost HTTP API.
|
||||||
Serve,
|
Serve,
|
||||||
|
/// Print simple MCP tool metadata.
|
||||||
|
Mcp {
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: McpCommand,
|
||||||
|
},
|
||||||
|
/// Append generated markdown into the agent-owned workspace.
|
||||||
|
Capture {
|
||||||
|
#[command(subcommand)]
|
||||||
|
kind: CaptureKind,
|
||||||
|
},
|
||||||
/// Show vault scan metrics.
|
/// Show vault scan metrics.
|
||||||
Stats {
|
Stats {
|
||||||
/// Emit JSON instead of text.
|
/// Emit JSON instead of text.
|
||||||
|
|
@ -63,6 +83,39 @@ pub enum Commands {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Subcommand)]
|
||||||
|
pub enum McpCommand {
|
||||||
|
Tools,
|
||||||
|
Search {
|
||||||
|
query: String,
|
||||||
|
#[arg(short, long, default_value_t = 10)]
|
||||||
|
limit: usize,
|
||||||
|
},
|
||||||
|
Context {
|
||||||
|
query: String,
|
||||||
|
#[arg(short, long, default_value_t = 5)]
|
||||||
|
limit: usize,
|
||||||
|
},
|
||||||
|
Read {
|
||||||
|
path: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Subcommand)]
|
||||||
|
pub enum CaptureKind {
|
||||||
|
Memory(CaptureArgs),
|
||||||
|
Task(CaptureArgs),
|
||||||
|
Decision(CaptureArgs),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, clap::Args)]
|
||||||
|
pub struct CaptureArgs {
|
||||||
|
#[arg(long, default_value = "general")]
|
||||||
|
pub project: String,
|
||||||
|
#[arg(long)]
|
||||||
|
pub text: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, ValueEnum)]
|
#[derive(Clone, Debug, ValueEnum)]
|
||||||
pub enum OutputFormat {
|
pub enum OutputFormat {
|
||||||
Text,
|
Text,
|
||||||
|
|
|
||||||
87
src/context.rs
Normal file
87
src/context.rs
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::db::SearchHit;
|
||||||
|
|
||||||
|
pub trait Summarizer {
|
||||||
|
fn summarize(&self, text: &str) -> Option<String>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DisabledSummarizer;
|
||||||
|
|
||||||
|
impl Summarizer for DisabledSummarizer {
|
||||||
|
fn summarize(&self, _text: &str) -> Option<String> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
pub struct ContextBundle {
|
||||||
|
pub query: String,
|
||||||
|
pub token_budget: usize,
|
||||||
|
pub used_tokens: usize,
|
||||||
|
pub summary: Option<String>,
|
||||||
|
pub sources: Vec<SearchHit>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ContextBundle {
|
||||||
|
pub fn from_hits(query: &str, token_budget: usize, hits: Vec<SearchHit>) -> Self {
|
||||||
|
Self::from_hits_with_summarizer(query, token_budget, hits, &DisabledSummarizer)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_hits_with_summarizer(
|
||||||
|
query: &str,
|
||||||
|
token_budget: usize,
|
||||||
|
hits: Vec<SearchHit>,
|
||||||
|
summarizer: &dyn Summarizer,
|
||||||
|
) -> Self {
|
||||||
|
let mut used_tokens = 0;
|
||||||
|
let mut sources = Vec::new();
|
||||||
|
|
||||||
|
// Keep the highest ranked hits first, but stop before the bundle gets too chunky.
|
||||||
|
for hit in hits {
|
||||||
|
if used_tokens + hit.token_estimate > token_budget && !sources.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
used_tokens += hit.token_estimate;
|
||||||
|
sources.push(hit);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
query: query.to_string(),
|
||||||
|
token_budget,
|
||||||
|
used_tokens,
|
||||||
|
summary: summarizer.summarize(query),
|
||||||
|
sources,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_markdown(&self) -> String {
|
||||||
|
let mut out = format!(
|
||||||
|
"# Glassmind Context\n\nQuery: `{}`\n\nBudget: {} tokens\nUsed: {} tokens\n\n",
|
||||||
|
self.query, self.token_budget, self.used_tokens
|
||||||
|
);
|
||||||
|
|
||||||
|
if self.sources.is_empty() {
|
||||||
|
out.push_str("No matching chunks found.\n");
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push_str("## Suggested Context\n\n");
|
||||||
|
for (idx, source) in self.sources.iter().enumerate() {
|
||||||
|
out.push_str(&format!("{}. `{}`", idx + 1, source.path));
|
||||||
|
if !source.heading_path.is_empty() {
|
||||||
|
out.push_str(&format!(" > {}", source.heading_path));
|
||||||
|
}
|
||||||
|
out.push_str(&format!(
|
||||||
|
"\n score: {:.4}, tokens: {}\n {}\n\n",
|
||||||
|
source.score, source.token_estimate, source.snippet
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push_str("## Sources\n\n");
|
||||||
|
for source in &self.sources {
|
||||||
|
out.push_str(&format!("- `{}`\n", source.path));
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
}
|
||||||
257
src/db.rs
257
src/db.rs
|
|
@ -7,9 +7,10 @@ use sha2::{Digest, Sha256};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::chunk::chunk_type_name;
|
use crate::chunk::chunk_type_name;
|
||||||
|
use crate::embedding::{EmbeddingBackend, cosine_similarity};
|
||||||
use crate::vault::{IndexWriteSummary, NoteMetadata, VaultIndex};
|
use crate::vault::{IndexWriteSummary, NoteMetadata, VaultIndex};
|
||||||
|
|
||||||
const INDEX_VERSION: i64 = 2;
|
const INDEX_VERSION: i64 = 3;
|
||||||
|
|
||||||
pub struct IndexStore {
|
pub struct IndexStore {
|
||||||
conn: Connection,
|
conn: Connection,
|
||||||
|
|
@ -17,11 +18,17 @@ pub struct IndexStore {
|
||||||
|
|
||||||
#[derive(Clone, Debug, serde::Serialize)]
|
#[derive(Clone, Debug, serde::Serialize)]
|
||||||
pub struct SearchHit {
|
pub struct SearchHit {
|
||||||
|
pub chunk_id: i64,
|
||||||
pub path: String,
|
pub path: String,
|
||||||
pub title: String,
|
pub title: String,
|
||||||
pub heading_path: String,
|
pub heading_path: String,
|
||||||
pub snippet: String,
|
pub snippet: String,
|
||||||
pub score: f64,
|
pub score: f64,
|
||||||
|
pub keyword_score: f64,
|
||||||
|
pub semantic_score: f64,
|
||||||
|
pub recency_score: f64,
|
||||||
|
pub link_score: f64,
|
||||||
|
pub tag_score: f64,
|
||||||
pub token_estimate: usize,
|
pub token_estimate: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -61,6 +68,7 @@ impl IndexStore {
|
||||||
insert_links(&tx, note_id, note, &mut summary)?;
|
insert_links(&tx, note_id, note, &mut summary)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete_missing_notes(&tx, index)?;
|
||||||
rebuild_fts_if_empty(&tx)?;
|
rebuild_fts_if_empty(&tx)?;
|
||||||
tx.commit()?;
|
tx.commit()?;
|
||||||
Ok(summary)
|
Ok(summary)
|
||||||
|
|
@ -75,6 +83,7 @@ impl IndexStore {
|
||||||
let mut stmt = self.conn.prepare(
|
let mut stmt = self.conn.prepare(
|
||||||
r#"
|
r#"
|
||||||
SELECT
|
SELECT
|
||||||
|
chunks.id,
|
||||||
notes.path,
|
notes.path,
|
||||||
notes.title,
|
notes.title,
|
||||||
chunks.heading_path,
|
chunks.heading_path,
|
||||||
|
|
@ -93,12 +102,18 @@ impl IndexStore {
|
||||||
let hits = stmt
|
let hits = stmt
|
||||||
.query_map(params![fts_query, limit as i64], |row| {
|
.query_map(params![fts_query, limit as i64], |row| {
|
||||||
Ok(SearchHit {
|
Ok(SearchHit {
|
||||||
path: row.get(0)?,
|
chunk_id: row.get(0)?,
|
||||||
title: row.get(1)?,
|
path: row.get(1)?,
|
||||||
heading_path: row.get(2)?,
|
title: row.get(2)?,
|
||||||
snippet: row.get(3)?,
|
heading_path: row.get(3)?,
|
||||||
score: -row.get::<_, f64>(4)?,
|
snippet: row.get(4)?,
|
||||||
token_estimate: row.get::<_, i64>(5)? as usize,
|
score: -row.get::<_, f64>(5)?,
|
||||||
|
keyword_score: -row.get::<_, f64>(5)?,
|
||||||
|
semantic_score: 0.0,
|
||||||
|
recency_score: 0.0,
|
||||||
|
link_score: 0.0,
|
||||||
|
tag_score: 0.0,
|
||||||
|
token_estimate: row.get::<_, i64>(6)? as usize,
|
||||||
})
|
})
|
||||||
})?
|
})?
|
||||||
.collect::<rusqlite::Result<Vec<_>>>()?;
|
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||||
|
|
@ -106,6 +121,84 @@ impl IndexStore {
|
||||||
Ok(hits)
|
Ok(hits)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn hybrid_search(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
limit: usize,
|
||||||
|
backend: &dyn EmbeddingBackend,
|
||||||
|
config: &crate::config::Config,
|
||||||
|
) -> Result<Vec<SearchHit>> {
|
||||||
|
let mut hits = self.search(query, limit.saturating_mul(3).max(limit))?;
|
||||||
|
let query_embedding = backend.embed(query)?;
|
||||||
|
|
||||||
|
for hit in &mut hits {
|
||||||
|
if let Some(vector) = self.embedding_for_chunk(hit.chunk_id, backend.model())? {
|
||||||
|
hit.semantic_score = f64::from(cosine_similarity(&query_embedding.vector, &vector));
|
||||||
|
}
|
||||||
|
hit.recency_score = self.recency_score(hit.chunk_id)?;
|
||||||
|
hit.link_score = self.link_score(&hit.path)?;
|
||||||
|
hit.tag_score = self.tag_score(&hit.path, query)?;
|
||||||
|
hit.score = hit.keyword_score * f64::from(config.search.keyword_weight)
|
||||||
|
+ hit.semantic_score * f64::from(config.search.semantic_weight)
|
||||||
|
+ hit.recency_score * f64::from(config.search.recency_weight)
|
||||||
|
+ hit.link_score * f64::from(config.search.link_weight)
|
||||||
|
+ hit.tag_score * f64::from(config.search.tag_weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
hits.sort_by(|a, b| b.score.total_cmp(&a.score));
|
||||||
|
hits.truncate(limit);
|
||||||
|
self.audit_retrieval(query, &hits)?;
|
||||||
|
Ok(hits)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_embeddings(&mut self, backend: &dyn EmbeddingBackend) -> Result<usize> {
|
||||||
|
let tx = self.conn.transaction()?;
|
||||||
|
let mut stmt = tx.prepare(
|
||||||
|
r#"
|
||||||
|
SELECT chunks.id, chunks.content
|
||||||
|
FROM chunks
|
||||||
|
LEFT JOIN embeddings
|
||||||
|
ON embeddings.chunk_id = chunks.id
|
||||||
|
AND embeddings.model = ?1
|
||||||
|
WHERE embeddings.chunk_id IS NULL
|
||||||
|
"#,
|
||||||
|
)?;
|
||||||
|
let pending = stmt
|
||||||
|
.query_map([backend.model()], |row| {
|
||||||
|
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
|
||||||
|
})?
|
||||||
|
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||||
|
drop(stmt);
|
||||||
|
|
||||||
|
let mut written = 0;
|
||||||
|
for (chunk_id, content) in pending {
|
||||||
|
let embedding = backend.embed(&content)?;
|
||||||
|
tx.execute(
|
||||||
|
"INSERT OR REPLACE INTO embeddings (chunk_id, model, dimensions, vector, created_at) VALUES (?1, ?2, ?3, ?4, CURRENT_TIMESTAMP)",
|
||||||
|
params![
|
||||||
|
chunk_id,
|
||||||
|
embedding.model,
|
||||||
|
embedding.vector.len() as i64,
|
||||||
|
serde_json::to_string(&embedding.vector)?,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
written += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tx.commit()?;
|
||||||
|
Ok(written)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stats(&self) -> Result<StoreStats> {
|
||||||
|
Ok(StoreStats {
|
||||||
|
notes: count(&self.conn, "notes")?,
|
||||||
|
chunks: count(&self.conn, "chunks")?,
|
||||||
|
tags: count(&self.conn, "tags")?,
|
||||||
|
links: count(&self.conn, "links")?,
|
||||||
|
embeddings: count(&self.conn, "embeddings")?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn bootstrap(&self) -> Result<()> {
|
fn bootstrap(&self) -> Result<()> {
|
||||||
self.conn.execute_batch(
|
self.conn.execute_batch(
|
||||||
r#"
|
r#"
|
||||||
|
|
@ -125,7 +218,7 @@ impl IndexStore {
|
||||||
modified_unix_secs INTEGER,
|
modified_unix_secs INTEGER,
|
||||||
file_size INTEGER NOT NULL,
|
file_size INTEGER NOT NULL,
|
||||||
content_hash TEXT NOT NULL,
|
content_hash TEXT NOT NULL,
|
||||||
index_version INTEGER NOT NULL DEFAULT 2,
|
index_version INTEGER NOT NULL DEFAULT 3,
|
||||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
);
|
);
|
||||||
|
|
@ -175,6 +268,32 @@ impl IndexStore {
|
||||||
FOREIGN KEY(source_note_id) REFERENCES notes(id) ON DELETE CASCADE
|
FOREIGN KEY(source_note_id) REFERENCES notes(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings (
|
||||||
|
chunk_id INTEGER NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
dimensions INTEGER NOT NULL,
|
||||||
|
vector TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY(chunk_id) REFERENCES chunks(id) ON DELETE CASCADE,
|
||||||
|
PRIMARY KEY(chunk_id, model)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS retrieval_audit (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
query TEXT NOT NULL,
|
||||||
|
result_paths TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
client TEXT NOT NULL DEFAULT 'cli'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS memory_events (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
event_type TEXT NOT NULL,
|
||||||
|
source TEXT NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
INSERT OR IGNORE INTO migrations (id, name) VALUES (1, 'initial_metadata_index');
|
INSERT OR IGNORE INTO migrations (id, name) VALUES (1, 'initial_metadata_index');
|
||||||
"#,
|
"#,
|
||||||
)?;
|
)?;
|
||||||
|
|
@ -183,6 +302,38 @@ impl IndexStore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn delete_missing_notes(conn: &Connection, index: &VaultIndex) -> Result<()> {
|
||||||
|
let current = index
|
||||||
|
.notes
|
||||||
|
.iter()
|
||||||
|
.map(|note| path_to_db(¬e.path))
|
||||||
|
.collect::<std::collections::BTreeSet<_>>();
|
||||||
|
let mut stmt = conn.prepare("SELECT id, path FROM notes")?;
|
||||||
|
let existing = stmt
|
||||||
|
.query_map([], |row| {
|
||||||
|
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
|
||||||
|
})?
|
||||||
|
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||||
|
drop(stmt);
|
||||||
|
|
||||||
|
for (note_id, path) in existing {
|
||||||
|
if !current.contains(&path) {
|
||||||
|
clear_note_children(conn, note_id)?;
|
||||||
|
conn.execute("DELETE FROM notes WHERE id = ?1", [note_id])?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, serde::Serialize)]
|
||||||
|
pub struct StoreStats {
|
||||||
|
pub notes: i64,
|
||||||
|
pub chunks: i64,
|
||||||
|
pub tags: i64,
|
||||||
|
pub links: i64,
|
||||||
|
pub embeddings: i64,
|
||||||
|
}
|
||||||
|
|
||||||
fn existing_note_fresh(conn: &Connection, path: &Path, content_hash: &str) -> Result<bool> {
|
fn existing_note_fresh(conn: &Connection, path: &Path, content_hash: &str) -> Result<bool> {
|
||||||
let existing = conn
|
let existing = conn
|
||||||
.query_row(
|
.query_row(
|
||||||
|
|
@ -257,12 +408,96 @@ fn upsert_note(conn: &Connection, note: &NoteMetadata) -> Result<i64> {
|
||||||
|
|
||||||
fn clear_note_children(conn: &Connection, note_id: i64) -> Result<()> {
|
fn clear_note_children(conn: &Connection, note_id: i64) -> Result<()> {
|
||||||
delete_note_fts(conn, note_id)?;
|
delete_note_fts(conn, note_id)?;
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM embeddings WHERE chunk_id IN (SELECT id FROM chunks WHERE note_id = ?1)",
|
||||||
|
[note_id],
|
||||||
|
)?;
|
||||||
conn.execute("DELETE FROM chunks WHERE note_id = ?1", [note_id])?;
|
conn.execute("DELETE FROM chunks WHERE note_id = ?1", [note_id])?;
|
||||||
conn.execute("DELETE FROM note_tags WHERE note_id = ?1", [note_id])?;
|
conn.execute("DELETE FROM note_tags WHERE note_id = ?1", [note_id])?;
|
||||||
conn.execute("DELETE FROM links WHERE source_note_id = ?1", [note_id])?;
|
conn.execute("DELETE FROM links WHERE source_note_id = ?1", [note_id])?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl IndexStore {
|
||||||
|
fn embedding_for_chunk(&self, chunk_id: i64, model: &str) -> Result<Option<Vec<f32>>> {
|
||||||
|
self.conn
|
||||||
|
.query_row(
|
||||||
|
"SELECT vector FROM embeddings WHERE chunk_id = ?1 AND model = ?2",
|
||||||
|
params![chunk_id, model],
|
||||||
|
|row| row.get::<_, String>(0),
|
||||||
|
)
|
||||||
|
.optional()?
|
||||||
|
.map(|raw| serde_json::from_str(&raw).context("invalid stored embedding vector"))
|
||||||
|
.transpose()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recency_score(&self, chunk_id: i64) -> Result<f64> {
|
||||||
|
let modified: Option<i64> = self
|
||||||
|
.conn
|
||||||
|
.query_row(
|
||||||
|
"SELECT notes.modified_unix_secs FROM chunks JOIN notes ON notes.id = chunks.note_id WHERE chunks.id = ?1",
|
||||||
|
[chunk_id],
|
||||||
|
|row| row.get(0),
|
||||||
|
)
|
||||||
|
.optional()?
|
||||||
|
.flatten();
|
||||||
|
let Some(modified) = modified else {
|
||||||
|
return Ok(0.0);
|
||||||
|
};
|
||||||
|
let now = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.map_or(0, |duration| duration.as_secs() as i64);
|
||||||
|
let age_days = ((now - modified).max(0) as f64) / 86_400.0;
|
||||||
|
Ok(1.0 / (1.0 + age_days / 30.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn link_score(&self, path: &str) -> Result<f64> {
|
||||||
|
let stem = Path::new(path)
|
||||||
|
.file_stem()
|
||||||
|
.and_then(|stem| stem.to_str())
|
||||||
|
.unwrap_or(path);
|
||||||
|
let count: i64 = self.conn.query_row(
|
||||||
|
"SELECT count(*) FROM links WHERE lower(target) LIKE '%' || lower(?1) || '%'",
|
||||||
|
[stem],
|
||||||
|
|row| row.get(0),
|
||||||
|
)?;
|
||||||
|
Ok((count as f64).min(5.0) / 5.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tag_score(&self, path: &str, query: &str) -> Result<f64> {
|
||||||
|
let query = query.to_lowercase();
|
||||||
|
let mut stmt = self.conn.prepare(
|
||||||
|
r#"
|
||||||
|
SELECT tags.name
|
||||||
|
FROM tags
|
||||||
|
JOIN note_tags ON note_tags.tag_id = tags.id
|
||||||
|
JOIN notes ON notes.id = note_tags.note_id
|
||||||
|
WHERE notes.path = ?1
|
||||||
|
"#,
|
||||||
|
)?;
|
||||||
|
let tags = stmt
|
||||||
|
.query_map([path], |row| row.get::<_, String>(0))?
|
||||||
|
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||||
|
if tags.is_empty() {
|
||||||
|
return Ok(0.0);
|
||||||
|
}
|
||||||
|
let matches = tags
|
||||||
|
.iter()
|
||||||
|
.filter(|tag| query.contains(tag.as_str()))
|
||||||
|
.count();
|
||||||
|
Ok(matches as f64 / tags.len() as f64)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn audit_retrieval(&self, query: &str, hits: &[SearchHit]) -> Result<()> {
|
||||||
|
let paths = hits.iter().map(|hit| hit.path.clone()).collect::<Vec<_>>();
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT INTO retrieval_audit (query, result_paths, client) VALUES (?1, ?2, 'cli')",
|
||||||
|
params![query, serde_json::to_string(&paths)?],
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn insert_chunks(
|
fn insert_chunks(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
note_id: i64,
|
note_id: i64,
|
||||||
|
|
@ -411,3 +646,9 @@ fn fts_query(query: &str) -> String {
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join(" ")
|
.join(" ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn count(conn: &Connection, table: &str) -> Result<i64> {
|
||||||
|
let sql = format!("SELECT count(*) FROM {table}");
|
||||||
|
conn.query_row(&sql, [], |row| row.get(0))
|
||||||
|
.with_context(|| format!("failed to count {table}"))
|
||||||
|
}
|
||||||
|
|
|
||||||
126
src/embedding.rs
Normal file
126
src/embedding.rs
Normal file
|
|
@ -0,0 +1,126 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct Embedding {
|
||||||
|
pub model: String,
|
||||||
|
pub vector: Vec<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait EmbeddingBackend {
|
||||||
|
fn model(&self) -> &str;
|
||||||
|
fn embed(&self, text: &str) -> Result<Embedding>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LocalHashEmbedding {
|
||||||
|
model: String,
|
||||||
|
dimensions: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LocalHashEmbedding {
|
||||||
|
pub fn new(model: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
model: model.into(),
|
||||||
|
dimensions: 64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbeddingBackend for LocalHashEmbedding {
|
||||||
|
fn model(&self) -> &str {
|
||||||
|
&self.model
|
||||||
|
}
|
||||||
|
|
||||||
|
fn embed(&self, text: &str) -> Result<Embedding> {
|
||||||
|
Ok(Embedding {
|
||||||
|
model: self.model.clone(),
|
||||||
|
vector: hash_embedding(text, self.dimensions),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct OllamaEmbedding {
|
||||||
|
model: String,
|
||||||
|
url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OllamaEmbedding {
|
||||||
|
pub fn new(model: impl Into<String>, url: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
model: model.into(),
|
||||||
|
url: url.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbeddingBackend for OllamaEmbedding {
|
||||||
|
fn model(&self) -> &str {
|
||||||
|
&self.model
|
||||||
|
}
|
||||||
|
|
||||||
|
fn embed(&self, text: &str) -> Result<Embedding> {
|
||||||
|
// For now this keeps the pipeline local and testable. The backend shape is here, and
|
||||||
|
// the HTTP call can replace this body without touching retrieval or storage code.
|
||||||
|
let seed = format!("{}:{}:{}", self.url, self.model, text);
|
||||||
|
Ok(Embedding {
|
||||||
|
model: self.model.clone(),
|
||||||
|
vector: hash_embedding(&seed, 64),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn backend_from_config(config: &crate::config::Config) -> Box<dyn EmbeddingBackend> {
|
||||||
|
match config.embeddings.backend.as_str() {
|
||||||
|
"ollama" => Box::new(OllamaEmbedding::new(
|
||||||
|
config.embeddings.model.clone(),
|
||||||
|
config.embeddings.url.clone(),
|
||||||
|
)),
|
||||||
|
_ => Box::new(LocalHashEmbedding::new(config.embeddings.model.clone())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||||
|
let mut dot = 0.0;
|
||||||
|
let mut a_norm = 0.0;
|
||||||
|
let mut b_norm = 0.0;
|
||||||
|
|
||||||
|
for (left, right) in a.iter().zip(b.iter()) {
|
||||||
|
dot += left * right;
|
||||||
|
a_norm += left * left;
|
||||||
|
b_norm += right * right;
|
||||||
|
}
|
||||||
|
|
||||||
|
if a_norm == 0.0 || b_norm == 0.0 {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dot / (a_norm.sqrt() * b_norm.sqrt())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hash_embedding(text: &str, dimensions: usize) -> Vec<f32> {
|
||||||
|
let mut vector = vec![0.0; dimensions];
|
||||||
|
|
||||||
|
for token in text.split_whitespace() {
|
||||||
|
let normalized = token
|
||||||
|
.trim_matches(|c: char| !c.is_alphanumeric())
|
||||||
|
.to_lowercase();
|
||||||
|
if normalized.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let hash = Sha256::digest(normalized.as_bytes());
|
||||||
|
let idx = usize::from(hash[0]) % dimensions;
|
||||||
|
let sign = if hash[1] % 2 == 0 { 1.0 } else { -1.0 };
|
||||||
|
vector[idx] += sign;
|
||||||
|
}
|
||||||
|
|
||||||
|
let norm = vector.iter().map(|value| value * value).sum::<f32>().sqrt();
|
||||||
|
if norm > 0.0 {
|
||||||
|
for value in &mut vector {
|
||||||
|
*value /= norm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vector
|
||||||
|
}
|
||||||
120
src/main.rs
120
src/main.rs
|
|
@ -1,18 +1,25 @@
|
||||||
|
mod agent;
|
||||||
mod chunk;
|
mod chunk;
|
||||||
mod cli;
|
mod cli;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod context;
|
||||||
mod db;
|
mod db;
|
||||||
|
mod embedding;
|
||||||
mod logging;
|
mod logging;
|
||||||
mod markdown;
|
mod markdown;
|
||||||
|
mod mcp;
|
||||||
|
mod server;
|
||||||
mod vault;
|
mod vault;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use tracing::{debug, info};
|
use tracing::{debug, info};
|
||||||
|
|
||||||
use crate::cli::{Cli, Commands, OutputFormat};
|
use crate::cli::{CaptureKind, Cli, Commands, McpCommand, OutputFormat};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
|
use crate::context::ContextBundle;
|
||||||
use crate::db::{IndexStore, SearchHit};
|
use crate::db::{IndexStore, SearchHit};
|
||||||
|
use crate::embedding::backend_from_config;
|
||||||
use crate::vault::VaultIndex;
|
use crate::vault::VaultIndex;
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
|
|
@ -26,21 +33,11 @@ fn main() -> Result<()> {
|
||||||
|
|
||||||
match cli.command {
|
match cli.command {
|
||||||
Commands::Init { force } => init_project(&config, force),
|
Commands::Init { force } => init_project(&config, force),
|
||||||
Commands::Index { json } => {
|
Commands::Index {
|
||||||
let index = VaultIndex::scan(&config)?;
|
json,
|
||||||
config.create_agent_dirs()?;
|
embeddings,
|
||||||
// Indexing writes the rebuildable cache, while search can still scan live markdown.
|
watch,
|
||||||
let db_path = config.vault.path.join(&config.database.path);
|
} => run_index(&config, json, embeddings, watch),
|
||||||
let mut store = IndexStore::open(&db_path)?;
|
|
||||||
let writes = store.write_index(&index)?;
|
|
||||||
let summary = index.summary_with_writes(writes);
|
|
||||||
if json {
|
|
||||||
println!("{}", serde_json::to_string_pretty(&summary)?);
|
|
||||||
} else {
|
|
||||||
println!("{summary}");
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
Commands::Stats { json } => {
|
Commands::Stats { json } => {
|
||||||
let index = VaultIndex::scan(&config)?;
|
let index = VaultIndex::scan(&config)?;
|
||||||
if json {
|
if json {
|
||||||
|
|
@ -53,17 +50,20 @@ fn main() -> Result<()> {
|
||||||
Commands::Search {
|
Commands::Search {
|
||||||
query,
|
query,
|
||||||
limit,
|
limit,
|
||||||
|
debug_scores,
|
||||||
output,
|
output,
|
||||||
} => {
|
} => {
|
||||||
let db_path = ensure_index_cache(&config)?;
|
let db_path = ensure_index_cache(&config)?;
|
||||||
let store = IndexStore::open(&db_path)?;
|
let mut store = IndexStore::open(&db_path)?;
|
||||||
let results = store.search(&query, limit)?;
|
let backend = backend_from_config(&config);
|
||||||
|
store.generate_embeddings(backend.as_ref())?;
|
||||||
|
let results = store.hybrid_search(&query, limit, backend.as_ref(), &config)?;
|
||||||
match output {
|
match output {
|
||||||
OutputFormat::Text => {
|
OutputFormat::Text => {
|
||||||
if results.is_empty() {
|
if results.is_empty() {
|
||||||
println!("No matches.");
|
println!("No matches.");
|
||||||
}
|
}
|
||||||
print_search_results(&results);
|
print_search_results(&results, debug_scores);
|
||||||
}
|
}
|
||||||
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&results)?),
|
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&results)?),
|
||||||
}
|
}
|
||||||
|
|
@ -72,10 +72,15 @@ fn main() -> Result<()> {
|
||||||
Commands::Context {
|
Commands::Context {
|
||||||
query,
|
query,
|
||||||
limit,
|
limit,
|
||||||
|
budget,
|
||||||
output,
|
output,
|
||||||
} => {
|
} => {
|
||||||
let index = VaultIndex::scan(&config)?;
|
let db_path = ensure_index_cache(&config)?;
|
||||||
let bundle = index.context_bundle(&query, limit);
|
let mut store = IndexStore::open(&db_path)?;
|
||||||
|
let backend = backend_from_config(&config);
|
||||||
|
store.generate_embeddings(backend.as_ref())?;
|
||||||
|
let hits = store.hybrid_search(&query, limit, backend.as_ref(), &config)?;
|
||||||
|
let bundle = ContextBundle::from_hits(&query, budget, hits);
|
||||||
match output {
|
match output {
|
||||||
OutputFormat::Text => println!("{}", bundle.to_markdown()),
|
OutputFormat::Text => println!("{}", bundle.to_markdown()),
|
||||||
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&bundle)?),
|
OutputFormat::Json => println!("{}", serde_json::to_string_pretty(&bundle)?),
|
||||||
|
|
@ -83,11 +88,35 @@ fn main() -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Commands::Serve => {
|
Commands::Serve => {
|
||||||
info!("serve command is reserved for the HTTP API milestone");
|
ensure_index_cache(&config)?;
|
||||||
println!(
|
server::serve(&config)
|
||||||
"HTTP API is not implemented yet. Planned bind: {}:{}",
|
}
|
||||||
config.server.host, config.server.port
|
Commands::Mcp { command } => match command {
|
||||||
);
|
McpCommand::Tools => mcp::print_tools(),
|
||||||
|
McpCommand::Search { query, limit } => {
|
||||||
|
ensure_index_cache(&config)?;
|
||||||
|
mcp::search(&config, &query, limit)
|
||||||
|
}
|
||||||
|
McpCommand::Context { query, limit } => {
|
||||||
|
ensure_index_cache(&config)?;
|
||||||
|
let mut store = IndexStore::open(&config.vault.path.join(&config.database.path))?;
|
||||||
|
let backend = backend_from_config(&config);
|
||||||
|
store.generate_embeddings(backend.as_ref())?;
|
||||||
|
let hits = store.hybrid_search(&query, limit, backend.as_ref(), &config)?;
|
||||||
|
let bundle = ContextBundle::from_hits(&query, 6000, hits);
|
||||||
|
println!("{}", serde_json::to_string_pretty(&bundle)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
McpCommand::Read { path } => mcp::read(&config, &path),
|
||||||
|
},
|
||||||
|
Commands::Capture { kind } => {
|
||||||
|
let (kind_name, args) = match kind {
|
||||||
|
CaptureKind::Memory(args) => ("memory", args),
|
||||||
|
CaptureKind::Task(args) => ("task", args),
|
||||||
|
CaptureKind::Decision(args) => ("decision", args),
|
||||||
|
};
|
||||||
|
let path = agent::capture(&config, kind_name, &args.project, &args.text)?;
|
||||||
|
println!("Captured {kind_name} at {}", path.display());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -101,6 +130,33 @@ fn init_project(config: &Config, force: bool) -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_index(config: &Config, json: bool, embeddings: bool, watch: bool) -> Result<()> {
|
||||||
|
loop {
|
||||||
|
let index = VaultIndex::scan(config)?;
|
||||||
|
config.create_agent_dirs()?;
|
||||||
|
// Indexing writes the rebuildable cache. Deleting it is always allowed.
|
||||||
|
let db_path = config.vault.path.join(&config.database.path);
|
||||||
|
let mut store = IndexStore::open(&db_path)?;
|
||||||
|
let writes = store.write_index(&index)?;
|
||||||
|
if embeddings {
|
||||||
|
let backend = backend_from_config(config);
|
||||||
|
let written = store.generate_embeddings(backend.as_ref())?;
|
||||||
|
info!(written, "generated embeddings");
|
||||||
|
}
|
||||||
|
let summary = index.summary_with_writes(writes);
|
||||||
|
if json {
|
||||||
|
println!("{}", serde_json::to_string_pretty(&summary)?);
|
||||||
|
} else {
|
||||||
|
println!("{summary}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if !watch {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn ensure_index_cache(config: &Config) -> Result<std::path::PathBuf> {
|
fn ensure_index_cache(config: &Config) -> Result<std::path::PathBuf> {
|
||||||
let db_path = config.vault.path.join(&config.database.path);
|
let db_path = config.vault.path.join(&config.database.path);
|
||||||
if db_path.exists() {
|
if db_path.exists() {
|
||||||
|
|
@ -114,7 +170,7 @@ fn ensure_index_cache(config: &Config) -> Result<std::path::PathBuf> {
|
||||||
Ok(db_path)
|
Ok(db_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_search_results(results: &[SearchHit]) {
|
fn print_search_results(results: &[SearchHit], debug_scores: bool) {
|
||||||
for (position, result) in results.iter().enumerate() {
|
for (position, result) in results.iter().enumerate() {
|
||||||
println!("{}. {}", position + 1, result.path);
|
println!("{}. {}", position + 1, result.path);
|
||||||
println!(" title: {}", result.title);
|
println!(" title: {}", result.title);
|
||||||
|
|
@ -123,6 +179,16 @@ fn print_search_results(results: &[SearchHit]) {
|
||||||
}
|
}
|
||||||
println!(" tokens: {}", result.token_estimate);
|
println!(" tokens: {}", result.token_estimate);
|
||||||
println!(" score: {:.4}", result.score);
|
println!(" score: {:.4}", result.score);
|
||||||
|
if debug_scores {
|
||||||
|
println!(
|
||||||
|
" keyword {:.4}, semantic {:.4}, recency {:.4}, tags {:.4}, links {:.4}",
|
||||||
|
result.keyword_score,
|
||||||
|
result.semantic_score,
|
||||||
|
result.recency_score,
|
||||||
|
result.tag_score,
|
||||||
|
result.link_score
|
||||||
|
);
|
||||||
|
}
|
||||||
println!(" {}", result.snippet);
|
println!(" {}", result.snippet);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
52
src/mcp.rs
Normal file
52
src/mcp.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::db::IndexStore;
|
||||||
|
use crate::embedding::backend_from_config;
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct ToolSpec {
|
||||||
|
name: &'static str,
|
||||||
|
description: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_tools() -> Result<()> {
|
||||||
|
let tools = vec![
|
||||||
|
ToolSpec {
|
||||||
|
name: "glassmind_search",
|
||||||
|
description: "Search indexed markdown chunks.",
|
||||||
|
},
|
||||||
|
ToolSpec {
|
||||||
|
name: "glassmind_context",
|
||||||
|
description: "Build a compact context bundle from markdown chunks.",
|
||||||
|
},
|
||||||
|
ToolSpec {
|
||||||
|
name: "glassmind_read",
|
||||||
|
description: "Read a note by vault-relative path.",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
println!("{}", serde_json::to_string_pretty(&tools)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn search(config: &Config, query: &str, limit: usize) -> Result<()> {
|
||||||
|
let store = IndexStore::open(&config.vault.path.join(&config.database.path))?;
|
||||||
|
let backend = backend_from_config(config);
|
||||||
|
let hits = store.hybrid_search(query, limit, backend.as_ref(), config)?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&hits)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read(config: &Config, path: &str) -> Result<()> {
|
||||||
|
let path = config.vault.path.join(path);
|
||||||
|
let content = std::fs::read_to_string(&path)?;
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
serde_json::to_string_pretty(&serde_json::json!({
|
||||||
|
"path": path.display().to_string(),
|
||||||
|
"content": content
|
||||||
|
}))?
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
106
src/server.rs
Normal file
106
src/server.rs
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use std::net::{TcpListener, TcpStream};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::context::ContextBundle;
|
||||||
|
use crate::db::IndexStore;
|
||||||
|
use crate::embedding::backend_from_config;
|
||||||
|
|
||||||
|
pub fn serve(config: &Config) -> Result<()> {
|
||||||
|
let addr = format!("{}:{}", config.server.host, config.server.port);
|
||||||
|
let listener = TcpListener::bind(&addr)?;
|
||||||
|
println!("Glassmind listening on http://{addr}");
|
||||||
|
|
||||||
|
for stream in listener.incoming() {
|
||||||
|
match stream {
|
||||||
|
Ok(stream) => {
|
||||||
|
if let Err(err) = handle_connection(config, stream) {
|
||||||
|
eprintln!("request failed: {err}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => eprintln!("connection failed: {err}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_connection(config: &Config, mut stream: TcpStream) -> Result<()> {
|
||||||
|
let mut buffer = [0; 8192];
|
||||||
|
let read = stream.read(&mut buffer)?;
|
||||||
|
let request = String::from_utf8_lossy(&buffer[..read]);
|
||||||
|
let first_line = request.lines().next().unwrap_or_default();
|
||||||
|
|
||||||
|
let response = if first_line.starts_with("GET /health ") {
|
||||||
|
json_response(200, r#"{"status":"ok"}"#)
|
||||||
|
} else if first_line.starts_with("GET /stats ") {
|
||||||
|
let store = IndexStore::open(&config.vault.path.join(&config.database.path))?;
|
||||||
|
json_response(200, &serde_json::to_string(&store.stats()?)?)
|
||||||
|
} else if first_line.starts_with("POST /search ") {
|
||||||
|
let body = request.split("\r\n\r\n").nth(1).unwrap_or_default();
|
||||||
|
let query = json_field(body, "query").unwrap_or_default();
|
||||||
|
let limit = json_field(body, "limit")
|
||||||
|
.and_then(|raw| raw.parse::<usize>().ok())
|
||||||
|
.unwrap_or(10);
|
||||||
|
let store = IndexStore::open(&config.vault.path.join(&config.database.path))?;
|
||||||
|
let backend = backend_from_config(config);
|
||||||
|
let hits = store.hybrid_search(&query, limit, backend.as_ref(), config)?;
|
||||||
|
json_response(200, &serde_json::to_string(&hits)?)
|
||||||
|
} else if first_line.starts_with("POST /context ") {
|
||||||
|
let body = request.split("\r\n\r\n").nth(1).unwrap_or_default();
|
||||||
|
let query = json_field(body, "query").unwrap_or_default();
|
||||||
|
let limit = json_field(body, "limit")
|
||||||
|
.and_then(|raw| raw.parse::<usize>().ok())
|
||||||
|
.unwrap_or(8);
|
||||||
|
let budget = json_field(body, "budget")
|
||||||
|
.and_then(|raw| raw.parse::<usize>().ok())
|
||||||
|
.unwrap_or(6000);
|
||||||
|
let store = IndexStore::open(&config.vault.path.join(&config.database.path))?;
|
||||||
|
let backend = backend_from_config(config);
|
||||||
|
let hits = store.hybrid_search(&query, limit, backend.as_ref(), config)?;
|
||||||
|
let bundle = ContextBundle::from_hits(&query, budget, hits);
|
||||||
|
json_response(200, &serde_json::to_string(&bundle)?)
|
||||||
|
} else if first_line.starts_with("GET /notes/") {
|
||||||
|
let raw_path = first_line
|
||||||
|
.trim_start_matches("GET /notes/")
|
||||||
|
.split_whitespace()
|
||||||
|
.next()
|
||||||
|
.unwrap_or_default();
|
||||||
|
let note_path = config.vault.path.join(raw_path.replace("%20", " "));
|
||||||
|
let content = std::fs::read_to_string(note_path)?;
|
||||||
|
json_response(
|
||||||
|
200,
|
||||||
|
&serde_json::to_string(&serde_json::json!({ "content": content }))?,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
json_response(404, r#"{"error":"not found"}"#)
|
||||||
|
};
|
||||||
|
|
||||||
|
stream.write_all(response.as_bytes())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn json_response(status: u16, body: &str) -> String {
|
||||||
|
let label = match status {
|
||||||
|
200 => "OK",
|
||||||
|
404 => "Not Found",
|
||||||
|
_ => "OK",
|
||||||
|
};
|
||||||
|
format!(
|
||||||
|
"HTTP/1.1 {status} {label}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
|
||||||
|
body.len(),
|
||||||
|
body
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn json_field(body: &str, field: &str) -> Option<String> {
|
||||||
|
let value = serde_json::from_str::<serde_json::Value>(body).ok()?;
|
||||||
|
value.get(field).map(|raw| {
|
||||||
|
raw.as_str()
|
||||||
|
.map(ToString::to_string)
|
||||||
|
.unwrap_or_else(|| raw.to_string())
|
||||||
|
.trim_matches('"')
|
||||||
|
.to_string()
|
||||||
|
})
|
||||||
|
}
|
||||||
108
src/vault.rs
108
src/vault.rs
|
|
@ -60,18 +60,6 @@ pub struct IndexWriteSummary {
|
||||||
pub links_written: usize,
|
pub links_written: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
|
||||||
pub struct SearchResult {
|
|
||||||
pub note: NoteMetadata,
|
|
||||||
pub score: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
|
||||||
pub struct ContextBundle {
|
|
||||||
pub query: String,
|
|
||||||
pub sources: Vec<SearchResult>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl VaultIndex {
|
impl VaultIndex {
|
||||||
pub fn scan(config: &Config) -> Result<Self> {
|
pub fn scan(config: &Config) -> Result<Self> {
|
||||||
let vault_path = config
|
let vault_path = config
|
||||||
|
|
@ -146,91 +134,6 @@ impl VaultIndex {
|
||||||
..self.summary()
|
..self.summary()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn search(&self, query: &str, limit: usize) -> Vec<SearchResult> {
|
|
||||||
let terms = query_terms(query);
|
|
||||||
let mut results: Vec<_> = self
|
|
||||||
.notes
|
|
||||||
.iter()
|
|
||||||
.filter_map(|note| {
|
|
||||||
let haystack = format!(
|
|
||||||
"{} {} {}",
|
|
||||||
note.path.display(),
|
|
||||||
note.title,
|
|
||||||
note.blocks
|
|
||||||
.iter()
|
|
||||||
.map(|block| block.text.as_str())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ")
|
|
||||||
)
|
|
||||||
.to_lowercase();
|
|
||||||
let score = terms
|
|
||||||
.iter()
|
|
||||||
.filter(|term| haystack.contains(term.as_str()))
|
|
||||||
.count();
|
|
||||||
(score > 0).then(|| SearchResult {
|
|
||||||
note: note.clone(),
|
|
||||||
score,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
results.sort_by(|a, b| {
|
|
||||||
b.score
|
|
||||||
.cmp(&a.score)
|
|
||||||
.then_with(|| a.note.path.cmp(&b.note.path))
|
|
||||||
});
|
|
||||||
results.truncate(limit);
|
|
||||||
results
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn context_bundle(&self, query: &str, limit: usize) -> ContextBundle {
|
|
||||||
ContextBundle {
|
|
||||||
query: query.to_string(),
|
|
||||||
sources: self.search(query, limit),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ContextBundle {
|
|
||||||
pub fn to_markdown(&self) -> String {
|
|
||||||
let mut out = format!("# Glassmind Context\n\nQuery: `{}`\n\n", self.query);
|
|
||||||
if self.sources.is_empty() {
|
|
||||||
out.push_str("No matching markdown notes were found.\n");
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
out.push_str("## Sources\n\n");
|
|
||||||
for (idx, result) in self.sources.iter().enumerate() {
|
|
||||||
out.push_str(&format!(
|
|
||||||
"{}. `{}` - score {}\n",
|
|
||||||
idx + 1,
|
|
||||||
result.note.path.display(),
|
|
||||||
result.score
|
|
||||||
));
|
|
||||||
out.push_str(&format!(" - title: {}\n", result.note.title));
|
|
||||||
if !result.note.headings.is_empty() {
|
|
||||||
out.push_str(&format!(
|
|
||||||
" - headings: {}\n",
|
|
||||||
result.note.headings.join(" > ")
|
|
||||||
));
|
|
||||||
}
|
|
||||||
if !result.note.wikilinks.is_empty() {
|
|
||||||
let links = result
|
|
||||||
.note
|
|
||||||
.wikilinks
|
|
||||||
.iter()
|
|
||||||
.map(|link| match &link.alias {
|
|
||||||
Some(alias) => format!("{} as {}", link.target, alias),
|
|
||||||
None => link.target.clone(),
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(", ");
|
|
||||||
out.push_str(&format!(" - wikilinks: {links}\n"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for IndexSummary {
|
impl fmt::Display for IndexSummary {
|
||||||
|
|
@ -344,14 +247,3 @@ fn is_markdown(path: &Path) -> bool {
|
||||||
.and_then(|extension| extension.to_str())
|
.and_then(|extension| extension.to_str())
|
||||||
.is_some_and(|extension| extension.eq_ignore_ascii_case("md"))
|
.is_some_and(|extension| extension.eq_ignore_ascii_case("md"))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn query_terms(query: &str) -> Vec<String> {
|
|
||||||
query
|
|
||||||
.split_whitespace()
|
|
||||||
.map(|term| {
|
|
||||||
term.trim_matches(|c: char| !c.is_alphanumeric())
|
|
||||||
.to_lowercase()
|
|
||||||
})
|
|
||||||
.filter(|term| !term.is_empty())
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user