Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
dialoguer = "0.12"
console = "0.15"
ctrlc = "3"
glob = "0.3.3"

[dev-dependencies]
tempfile = "3"
Expand Down
100 changes: 97 additions & 3 deletions src/cli/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use std::path::{Path, PathBuf};

use super::interactive;
use crate::config::{Config, Mode};
use crate::core::{Database, EmbeddingEngine, Indexer, SearchEngine, ServerIndexer};
use crate::core::{
Database, EmbeddingEngine, FileFilter, Indexer, SearchEngine, ServerIndexer,
};
use crate::server::{self, Client};
use crate::ui::{self, SearchTui};
use crate::watcher::FileWatcher;
Expand Down Expand Up @@ -95,6 +97,22 @@ enum Commands {
/// Sync/index before searching
#[arg(short = 's', long)]
sync: bool,

/// Filter by file extension (comma-separated, e.g., "rs,toml")
#[arg(long)]
ext: Option<String>,

/// Include only files matching these glob patterns (comma-separated)
#[arg(long)]
glob: Option<String>,

/// Exclude files matching these glob patterns (comma-separated)
#[arg(long)]
exclude: Option<String>,

/// Filter by file type category (e.g., "code", "config", "docs")
#[arg(long)]
r#type: Option<String>,
},

/// Watch directory for changes and auto-index
Expand Down Expand Up @@ -268,6 +286,7 @@ impl Cli {
show_content,
false,
false,
None,
);
}

Expand All @@ -289,11 +308,18 @@ impl Cli {
content,
interactive,
sync,
ext,
glob,
exclude,
r#type,
}) => {
let max_results = max_results
.or(self.max_results)
.unwrap_or(config.max_results);
let content = content || self.content || config.show_content;

let filter = build_file_filter(ext, glob, exclude, r#type);

run_search_smart(
&config,
&query,
Expand All @@ -302,6 +328,7 @@ impl Cli {
content,
interactive,
sync,
filter.as_ref(),
)
}
Some(Commands::Watch { path, dry_run }) => {
Expand Down Expand Up @@ -339,6 +366,66 @@ impl Cli {
}
}

fn build_file_filter(
ext: Option<String>,
glob: Option<String>,
exclude: Option<String>,
type_category: Option<String>,
) -> Option<FileFilter> {
if ext.is_none() && glob.is_none() && exclude.is_none() && type_category.is_none() {
return None;
}

let mut extensions = Vec::new();
let mut include_globs = Vec::new();
let mut exclude_globs = Vec::new();

if let Some(e) = ext {
extensions.extend(e.split(',').map(|s| s.trim().to_string()));
}

if let Some(g) = glob {
include_globs.extend(g.split(',').map(|s| s.trim().to_string()));
}

if let Some(e) = exclude {
exclude_globs.extend(e.split(',').map(|s| s.trim().to_string()));
}

if let Some(t) = type_category {
match t.as_str() {
"code" => extensions.extend(
vec![
"rs", "py", "js", "ts", "c", "cpp", "h", "hpp", "go", "java", "rb", "php", "sh",
]
.into_iter()
.map(String::from),
),
"config" => extensions.extend(
vec![
"toml", "json", "yaml", "yml", "xml", "ini", "conf", "config", "properties",
]
.into_iter()
.map(String::from),
),
"docs" => extensions.extend(
vec!["md", "txt", "rst", "adoc", "pdf"]
.into_iter()
.map(String::from),
),
_ => {
// Treat unknown types as single extensions for now, or just warn
// For now, we'll assume it might be a custom type which we don't support yet,
// so we just add it as an extension to be safe? No, that's confusing.
// Let's print a warning if possible, but we are in a helper function.
// We'll just ignore unknown types for now.
}
}
}

Some(FileFilter::new(extensions, include_globs, exclude_globs))
}

fn print_quick_help() {
ui::print_banner();

Expand Down Expand Up @@ -547,6 +634,7 @@ fn run_search_smart(
show_content: bool,
interactive: bool,
sync: bool,
filter: Option<&FileFilter>,
) -> Result<()> {
if sync {
run_index(
Expand Down Expand Up @@ -576,13 +664,18 @@ fn run_search_smart(
max_results,
show_content,
interactive,
filter,
);
}

// Note: Filters not yet supported in server mode
if filter.is_some() {
ui::print_warning("Filters are currently only supported in local mode. Ignoring filters.");
}
run_search_server(&client, query, path, max_results, show_content)
}
Mode::Local => {
run_search_local(config, query, path, max_results, show_content, interactive)
run_search_local(config, query, path, max_results, show_content, interactive, filter)
}
}
}
Expand Down Expand Up @@ -653,6 +746,7 @@ fn run_search_local(
max_results: usize,
show_content: bool,
interactive: bool,
filter: Option<&FileFilter>,
) -> Result<()> {
use std::time::Instant;

Expand Down Expand Up @@ -684,7 +778,7 @@ fn run_search_local(
println!();

let start = Instant::now();
let results = search.search(query, path, max_results)?;
let results = search.search(query, path, filter, max_results)?;
let elapsed = start.elapsed();

if results.is_empty() {
Expand Down
10 changes: 10 additions & 0 deletions src/core/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use chrono::{DateTime, Utc};
use rusqlite::{params, Connection};
use std::path::{Path, PathBuf};

use super::filter::FileFilter;

pub struct Database {
conn: Connection,
}
Expand Down Expand Up @@ -156,6 +158,7 @@ impl Database {
&self,
query_embedding: &[f32],
path_prefix: &Path,
filter: Option<&FileFilter>,
limit: usize,
) -> Result<Vec<SearchResult>> {
let path_prefix_str = path_prefix.to_string_lossy();
Expand Down Expand Up @@ -185,6 +188,13 @@ impl Database {
})
})?
.filter_map(Result::ok)
.filter(|r| {
if let Some(f) = filter {
f.matches(&r.path)
} else {
true
}
})
.collect();

// Sort by similarity (highest first)
Expand Down
156 changes: 156 additions & 0 deletions src/core/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use glob::Pattern;
use std::path::Path;

#[derive(Debug, Clone)]
pub struct FileFilter {
pub extensions: Vec<String>,
pub include_globs: Vec<Pattern>,
pub exclude_globs: Vec<Pattern>,
}

impl FileFilter {
pub fn new(
extensions: Vec<String>,
include_globs: Vec<String>,
exclude_globs: Vec<String>,
) -> Self {
let include_globs = include_globs
.into_iter()
.filter_map(|s| Pattern::new(&s).ok())
.collect();
let exclude_globs = exclude_globs
.into_iter()
.filter_map(|s| Pattern::new(&s).ok())
.collect();

Self {
extensions,
include_globs,
exclude_globs,
}
}

pub fn matches(&self, path: &Path) -> bool {
// 1. Check extension (whitelist)
if !self.extensions.is_empty() {
match path.extension().and_then(|e| e.to_str()) {
Some(ext) => {
if !self.extensions.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
return false;
}
}
None => return false,
}
}

// 2. Check exclude patterns (blacklist)
for pattern in &self.exclude_globs {
if pattern.matches_path(path) {
return false;
}
}

// 3. Check include patterns (whitelist)
if !self.include_globs.is_empty() {
let mut matched = false;
for pattern in &self.include_globs {
if pattern.matches_path(path) {
matched = true;
break;
}
}
if !matched {
return false;
}
}

true
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;

#[test]
fn test_filter_extension() {
let filter = FileFilter::new(
vec!["rs".to_string(), "toml".to_string()],
vec![],
vec![],
);

assert!(filter.matches(&PathBuf::from("src/main.rs")));
assert!(filter.matches(&PathBuf::from("Cargo.toml")));
assert!(!filter.matches(&PathBuf::from("README.md")));
assert!(!filter.matches(&PathBuf::from("src/main")));
}

#[test]
fn test_filter_include_glob() {
let filter = FileFilter::new(
vec![],
vec!["src/**/*.rs".to_string()],
vec![],
);

assert!(filter.matches(&PathBuf::from("src/main.rs")));
assert!(filter.matches(&PathBuf::from("src/core/mod.rs")));
// Note: glob matching is relative to CWD usually, but Pattern matches absolute paths too if they match the string.
// glob::Pattern matches against the string representation.
// "src/**/*.rs" will match "src/main.rs"
assert!(!filter.matches(&PathBuf::from("tests/main.rs")));
assert!(!filter.matches(&PathBuf::from("README.md")));
}

#[test]
fn test_filter_exclude_glob() {
let filter = FileFilter::new(
vec![],
vec![],
vec!["target/**".to_string(), "node_modules/**".to_string()],
);

assert!(filter.matches(&PathBuf::from("src/main.rs")));
assert!(!filter.matches(&PathBuf::from("target/debug/vgrep")));
assert!(!filter.matches(&PathBuf::from("node_modules/react/index.js")));
}

#[test]
fn test_filter_combined() {
let filter = FileFilter::new(
vec!["rs".to_string()],
vec!["src/**".to_string()],
vec!["**/*_test.rs".to_string()],
);

// Must match extension AND include glob (if present) AND NOT exclude glob
// Wait, logic implementation:
// 1. Check extension (whitelist) - if present, MUST match
// 2. Check exclude (blacklist) - if matches, return FALSE
// 3. Check include (whitelist) - if present, MUST match

// "src/main.rs" -> ext=rs (ok), exclude= (ok), include=src/** (ok) -> true
assert!(filter.matches(&PathBuf::from("src/main.rs")));

// "src/main.py" -> ext=rs (fail) -> false
assert!(!filter.matches(&PathBuf::from("src/main.py")));

// "tests/main.rs" -> ext=rs (ok), exclude= (ok), include=src/** (fail) -> false
assert!(!filter.matches(&PathBuf::from("tests/main.rs")));

// "src/my_test.rs" -> ext=rs (ok), exclude=*_test.rs (fail) -> false
assert!(!filter.matches(&PathBuf::from("src/my_test.rs")));
}
}

impl Default for FileFilter {
fn default() -> Self {
Self {
extensions: Vec::new(),
include_globs: Vec::new(),
exclude_globs: Vec::new(),
}
}
}
Loading
Loading