diff --git a/Cargo.lock b/Cargo.lock index bb98223..ba6f11b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,7 @@ dependencies = [ [[package]] name = "exoshell" -version = "0.9.0" +version = "0.10.0" dependencies = [ "async-trait", "futures-util", diff --git a/Cargo.toml b/Cargo.toml index 8549422..d46b5bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "exoshell" -version = "0.9.0" +version = "0.10.0" edition = "2024" license = "GPL-3.0-or-later" diff --git a/README.md b/README.md index c0a209e..7cb6cdf 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ The current implementation supports the first shell-adjacent model chat mileston The codebase also contains the Phase 1.5 context engine foundation: context entries, provenance metadata, priority and size estimates, a session context store, provider registry, default manual/file/command-output/stdin/directory-summary providers, context REPL commands, deterministic pruning, budget checks, transcript events, startup context flags, piped stdin import, and prompt-context rendering. -The active milestone is Phase 3. The current implementation adds stance selection, explicit prompt assembly, visible prompt/context budget estimates, command suggestion IDs, simple risky-command warnings, command copy/explain/discard actions, a plain terminal session panel, Phase 2 help text, configurable model routing, Git-native project detection, lightweight project scans, attachable Git status context, attachable Git diff context, and attachable recent commit context. +The active milestone is Phase 3. The current implementation adds stance selection, explicit prompt assembly, visible prompt/context budget estimates, command suggestion IDs, simple risky-command warnings, command copy/explain/discard actions, a plain terminal session panel, Phase 2 help text, configurable model routing, Git-native project detection, lightweight project scans, attachable Git status context, attachable Git diff context, attachable recent commit context, and attachable repository search context. The broader roadmap is tracked in [docs/PHASES.md](docs/PHASES.md) and [docs/tasks](docs/tasks). diff --git a/docs/quickstart.md b/docs/quickstart.md index 7687637..75240e5 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -232,6 +232,13 @@ exo> /add-commits --count 10 exo> /add-commits --author=alice src/app.rs ``` +Attach repository search results: + +```text +exo> /search ContextProvider +exo> /search-path Cargo.toml +``` + Ask a question: ```text diff --git a/docs/versioning.md b/docs/versioning.md index 6abd254..b0f5ed3 100644 --- a/docs/versioning.md +++ b/docs/versioning.md @@ -108,3 +108,4 @@ Historical codenames should be tracked in docs/versioning.md below * 0.7.0 diff-lantern * 0.8.0 commit-oracle * 0.9.0 summary-relay +* 0.10.0 search-relay diff --git a/src/app.rs b/src/app.rs index 4b33104..2881528 100644 --- a/src/app.rs +++ b/src/app.rs @@ -254,6 +254,14 @@ impl App { return self.add_git_commit_context(args); } + if let Some(query) = trimmed.strip_prefix("/search-path ") { + return self.add_repository_search_context("path", query.trim()); + } + + if let Some(query) = trimmed.strip_prefix("/search ") { + return self.add_repository_search_context("text", query.trim()); + } + Err(ContextError::InvalidInput(format!("unknown context command: {trimmed}")).into()) } @@ -349,6 +357,29 @@ impl App { ) } + pub fn add_repository_search_context( + &mut self, + mode: &str, + query: &str, + ) -> Result { + if query.trim().is_empty() { + return Err(ContextError::InvalidInput("search query cannot be empty".into()).into()); + } + let mut provider_options = HashMap::new(); + provider_options.insert("mode".into(), mode.to_string()); + provider_options.insert("query".into(), query.trim().to_string()); + + let path = self.project_context_path()?; + self.add_context( + "repo_search", + ContextProviderRequest { + path: Some(path), + provider_options, + ..ContextProviderRequest::default() + }, + ) + } + pub fn project_scan(&mut self, preview: bool) -> Result { let cwd = std::env::current_dir().map_err(|error| ProjectError::Read { path: PathBuf::from("."), @@ -696,6 +727,8 @@ fn help_overview() -> &'static str { /add-git-status attach current Git branch and status /add-diff [--staged] [path] attach unstaged or staged Git diff context /add-commits [options] [path] attach recent Git commit history +/search attach repository text search results +/search-path attach repository path search results /add-output paste command output as explicit context /stance [name] show or set operator, audit, teach, or quiet /copy print a suggested command; does not execute it @@ -720,6 +753,9 @@ fn help_topic(topic: &str) -> &'static str { "git" => { "Use /add-git-status to attach current branch, staged files, modified files, and untracked files. Use /add-diff, /add-diff --staged, or /add-diff --staged to attach read-only diff context. Use /add-commits, /add-commits --count 10, /add-commits --author=alice, or /add-commits src/app.rs to attach recent history." } + "search" => { + "Use /search to attach repository text matches with file, line, and column locations. Use /search-path to attach matching repository paths. Text search uses ripgrep when available and falls back to a bounded repository walk." + } "stance" => { "Stances change the compact prompt fragment used for the next request: operator is concise and action-oriented, audit focuses on risks, teach explains more, and quiet minimizes prose while keeping safety warnings." } @@ -730,7 +766,7 @@ fn help_topic(topic: &str) -> &'static str { "The current line REPL does not install advanced terminal keybindings. Use /keys to see the predictable slash-command fallbacks for copy, explain, discard, context, and stance actions." } _ => { - "Unknown help topic. Try /help context, /help project, /help git, /help stance, /help commands, or /help keys." + "Unknown help topic. Try /help context, /help project, /help git, /help search, /help stance, /help commands, or /help keys." } } } @@ -888,6 +924,7 @@ mod tests { use crate::config::{ CommandConfig, InteractionConfig, ProviderConfig, ShellConfig, TranscriptConfig, }; + use std::fs; use std::sync::{Arc, Mutex}; #[test] @@ -956,7 +993,8 @@ mod tests { "directory_summary".to_string(), "git_status".to_string(), "git_diff".to_string(), - "git_commits".to_string() + "git_commits".to_string(), + "repo_search".to_string() ] ); assert_eq!(app.context_store.total_size().characters, 0); @@ -1012,6 +1050,29 @@ mod tests { ); } + #[test] + fn search_path_command_adds_repository_search_context() { + let tempdir = tempfile::tempdir().expect("tempdir"); + let src = tempdir.path().join("src"); + fs::create_dir(&src).expect("create src"); + fs::write(src.join("app.rs"), "fn main() {}\n").expect("write app"); + + let mut config = test_config(); + config.project.root = Some(tempdir.path().to_path_buf()); + let mut app = App::new(config, Box::new(NoopProvider)); + + let message = app + .handle_command("/search-path APP") + .expect("search path command"); + + assert_eq!(message, "added ctx-001 (repository path search: APP)"); + assert!( + app.handle_command("/context show ctx-001") + .expect("show") + .contains("src/app.rs") + ); + } + #[test] fn stance_command_shows_and_changes_current_stance() { let mut app = App::new(test_config(), Box::new(NoopProvider)); @@ -1139,6 +1200,16 @@ mod tests { .expect("help git") .contains("/add-diff --staged") ); + assert!( + app.handle_command("/help") + .expect("help") + .contains("/search ") + ); + assert!( + app.handle_command("/help search") + .expect("help search") + .contains("ripgrep") + ); assert!( app.handle_command("/help commands") .expect("help") diff --git a/src/context.rs b/src/context.rs index 556d733..3c1e713 100644 --- a/src/context.rs +++ b/src/context.rs @@ -319,6 +319,7 @@ pub fn register_default_context_providers( registry.register(Box::new(GitStatusContextProvider))?; registry.register(Box::new(GitDiffContextProvider::default()))?; registry.register(Box::new(GitCommitContextProvider))?; + registry.register(Box::new(RepositorySearchContextProvider::default()))?; Ok(()) } @@ -658,6 +659,113 @@ impl ContextProvider for GitCommitContextProvider { } } +#[derive(Debug, Clone)] +pub struct RepositorySearchContextProvider { + pub max_results: usize, + pub max_file_bytes: u64, +} + +impl Default for RepositorySearchContextProvider { + fn default() -> Self { + Self { + max_results: 100, + max_file_bytes: 256 * 1024, + } + } +} + +impl ContextProvider for RepositorySearchContextProvider { + fn metadata(&self) -> ContextProviderMetadata { + ContextProviderMetadata { + name: "repo_search".into(), + kind: ContextKind::SearchResult, + description: "searches repository text or paths as explicit context".into(), + } + } + + fn collect(&self, request: ContextProviderRequest) -> Result { + let path = request + .path + .or(request.cwd) + .unwrap_or_else(|| PathBuf::from(".")); + let query = request + .provider_options + .get("query") + .ok_or_else(|| ContextError::InvalidInput("search query is required".into()))? + .trim() + .to_string(); + if query.is_empty() { + return Err(ContextError::InvalidInput( + "search query cannot be empty".into(), + )); + } + let mode = request + .provider_options + .get("mode") + .map(String::as_str) + .unwrap_or("text"); + let search = match mode { + "text" => search_text(&path, &query, self.max_results, self.max_file_bytes)?, + "path" => search_paths(&path, &query, self.max_results)?, + other => { + return Err(ContextError::InvalidInput(format!( + "unsupported search mode '{other}', expected text or path" + ))); + } + }; + + let mut provenance = ContextProvenance::new(ContextOrigin::Search); + provenance.source_path = Some(path); + provenance + .provider_details + .insert("mode".into(), mode.to_string()); + provenance + .provider_details + .insert("query".into(), query.clone()); + provenance + .provider_details + .insert("engine".into(), search.engine.to_string()); + provenance + .provider_details + .insert("result_count".into(), search.results.len().to_string()); + provenance + .provider_details + .insert("max_results".into(), self.max_results.to_string()); + provenance + .provider_details + .insert("truncated".into(), search.truncated.to_string()); + + let content = if search.results.is_empty() { + format!( + "mode: {mode}\nquery: {query}\nengine: {}\ntruncated: {}\nresults: none", + search.engine, search.truncated + ) + } else { + format!( + "mode: {mode}\nquery: {query}\nengine: {}\ntruncated: {}\nresults:\n{}", + search.engine, + search.truncated, + search + .results + .iter() + .map(|result| format!("- {result}")) + .collect::>() + .join("\n") + ) + }; + + Ok(ContextEntry::new( + "", + ContextKind::SearchResult, + request + .title + .unwrap_or_else(|| format!("repository {mode} search: {query}")), + provenance, + content, + )) + } +} + #[derive(Debug, Clone)] pub struct GitDiffContextProvider { pub max_characters: usize, @@ -883,6 +991,191 @@ fn git_log_output( ))) } +#[derive(Debug, Clone, PartialEq, Eq)] +struct SearchCollection { + engine: SearchEngine, + results: Vec, + truncated: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SearchEngine { + Ripgrep, + Fallback, +} + +impl fmt::Display for SearchEngine { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Ripgrep => formatter.write_str("ripgrep"), + Self::Fallback => formatter.write_str("fallback"), + } + } +} + +fn search_text( + root: &Path, + query: &str, + max_results: usize, + max_file_bytes: u64, +) -> Result { + let rg_output = Command::new("rg") + .arg("--line-number") + .arg("--column") + .arg("--no-heading") + .arg("--color=never") + .arg("--fixed-strings") + .arg("--") + .arg(query) + .arg(root) + .output(); + if let Ok(output) = rg_output { + let usable_output = output.status.success() || output.status.code() == Some(1); + if usable_output { + let stdout = String::from_utf8(output.stdout).map_err(|error| { + ContextError::UnsupportedContent(format!("rg output was not valid UTF-8: {error}")) + })?; + let scan_limit = max_results.saturating_add(1); + let all_results: Vec = stdout + .lines() + .take(scan_limit) + .map(|line| normalize_search_line(root, line)) + .collect(); + let truncated = all_results.len() > max_results; + return Ok(SearchCollection { + engine: SearchEngine::Ripgrep, + results: all_results.into_iter().take(max_results).collect(), + truncated, + }); + } + } + + fallback_text_search(root, query, max_results, max_file_bytes) +} + +fn search_paths( + root: &Path, + query: &str, + max_results: usize, +) -> Result { + let mut results = Vec::new(); + let needle = query.to_lowercase(); + let scan_limit = max_results.saturating_add(1); + visit_files(root, scan_limit, &mut |path| { + let relative = relative_path(root, path); + if relative.to_lowercase().contains(&needle) { + results.push(relative); + } + Ok(results.len() < scan_limit) + })?; + let truncated = results.len() > max_results; + results.truncate(max_results); + Ok(SearchCollection { + engine: SearchEngine::Fallback, + results, + truncated, + }) +} + +fn fallback_text_search( + root: &Path, + query: &str, + max_results: usize, + max_file_bytes: u64, +) -> Result { + let mut results = Vec::new(); + let needle = query.to_lowercase(); + let scan_limit = max_results.saturating_add(1); + visit_files(root, scan_limit, &mut |path| { + let Ok(metadata) = fs::metadata(path) else { + return Ok(true); + }; + if metadata.len() > max_file_bytes { + return Ok(true); + } + let bytes = match fs::read(path) { + Ok(bytes) => bytes, + Err(_) => return Ok(true), + }; + if bytes.contains(&0) { + return Ok(true); + } + let Ok(content) = String::from_utf8(bytes) else { + return Ok(true); + }; + for (index, line) in content.lines().enumerate() { + let lower_line = line.to_lowercase(); + if let Some(column_index) = lower_line.find(&needle) { + results.push(format!( + "{}:{}:{}:{}", + relative_path(root, path), + index + 1, + column_index + 1, + line.trim() + )); + if results.len() >= scan_limit { + break; + } + } + } + Ok(results.len() < scan_limit) + })?; + let truncated = results.len() > max_results; + results.truncate(max_results); + Ok(SearchCollection { + engine: SearchEngine::Fallback, + results, + truncated, + }) +} + +fn visit_files(root: &Path, max_results: usize, visitor: &mut F) -> Result<(), ContextError> +where + F: FnMut(&Path) -> Result, +{ + if max_results == 0 { + return Ok(()); + } + let mut stack = vec![root.to_path_buf()]; + while let Some(path) = stack.pop() { + let entries = match fs::read_dir(&path) { + Ok(entries) => entries, + Err(_) => continue, + }; + for entry in entries.filter_map(Result::ok) { + let name = entry.file_name().to_string_lossy().to_string(); + if is_noisy_path(&name) { + continue; + } + let entry_path = entry.path(); + let Ok(file_type) = entry.file_type() else { + continue; + }; + if file_type.is_dir() { + stack.push(entry_path); + } else if file_type.is_file() && !visitor(&entry_path)? { + return Ok(()); + } + } + } + Ok(()) +} + +fn normalize_search_line(root: &Path, line: &str) -> String { + let root = root.to_string_lossy(); + line.strip_prefix(root.as_ref()) + .and_then(|line| line.strip_prefix(std::path::MAIN_SEPARATOR)) + .unwrap_or(line) + .replace('\\', "/") +} + +fn relative_path(root: &Path, path: &Path) -> String { + path.strip_prefix(root) + .unwrap_or(path) + .to_string_lossy() + .replace('\\', "/") +} + #[derive(Debug, Clone, PartialEq, Eq)] struct TruncatedContent { content: String, @@ -1782,7 +2075,8 @@ mod tests { "directory_summary".to_string(), "git_status".to_string(), "git_diff".to_string(), - "git_commits".to_string() + "git_commits".to_string(), + "repo_search".to_string() ] ); } @@ -1848,6 +2142,74 @@ mod tests { assert_eq!(metadata.kind, ContextKind::GitHistory); } + #[test] + fn repository_search_provider_metadata_is_search_context() { + let metadata = RepositorySearchContextProvider::default().metadata(); + + assert_eq!(metadata.name, "repo_search"); + assert_eq!(metadata.kind, ContextKind::SearchResult); + } + + #[test] + fn repository_search_path_mode_collects_matching_paths() { + let tempdir = tempfile::tempdir().expect("tempdir"); + let src = tempdir.path().join("src"); + fs::create_dir(&src).expect("create src"); + fs::write(src.join("ContextProvider.rs"), "needle").expect("write file"); + fs::write(tempdir.path().join("README.md"), "readme").expect("write readme"); + + let provider = RepositorySearchContextProvider::default(); + let mut provider_options = HashMap::new(); + provider_options.insert("mode".into(), "path".into()); + provider_options.insert("query".into(), "contextprovider".into()); + + let entry = provider + .collect(ContextProviderRequest { + path: Some(tempdir.path().to_path_buf()), + provider_options, + ..ContextProviderRequest::default() + }) + .expect("search context"); + + assert_eq!(entry.kind, ContextKind::SearchResult); + assert!(entry.content.contains("mode: path")); + assert!(entry.content.contains("src/ContextProvider.rs")); + assert_eq!( + entry.provenance.provider_details.get("engine"), + Some(&"fallback".to_string()) + ); + } + + #[test] + fn fallback_text_search_reports_locations_and_truncation() { + let tempdir = tempfile::tempdir().expect("tempdir"); + fs::write( + tempdir.path().join("alpha.txt"), + "first Needle\nsecond needle\nthird needle\n", + ) + .expect("write text"); + + let search = + fallback_text_search(tempdir.path(), "needle", 2, 256 * 1024).expect("fallback search"); + + assert_eq!(search.engine, SearchEngine::Fallback); + assert!(search.truncated); + assert_eq!(search.results.len(), 2); + assert!(search.results[0].contains("alpha.txt:1:7:first Needle")); + assert!(search.results[1].contains("alpha.txt:2:8:second needle")); + } + + #[test] + fn fallback_text_search_skips_large_files() { + let tempdir = tempfile::tempdir().expect("tempdir"); + fs::write(tempdir.path().join("large.txt"), "needle").expect("write large"); + + let search = + fallback_text_search(tempdir.path(), "needle", 10, 2).expect("fallback search"); + + assert!(search.results.is_empty()); + } + #[test] fn git_commit_count_is_bounded() { assert_eq!(parse_git_commit_count(None).expect("default"), 5); diff --git a/src/repl.rs b/src/repl.rs index c5499eb..335c7af 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -61,6 +61,8 @@ impl Repl { || input.starts_with("/add-diff ") || input == "/add-commits" || input.starts_with("/add-commits ") + || input.starts_with("/search ") + || input.starts_with("/search-path ") { match self.app.handle_command(&input) { Ok(message) => println!("{message}"),