Ripgrep search.rs: Code Companion¶
Reference code for the search.rs lecture. Sections correspond to the lecture document.
Section 1: The Config Struct¶
/// The configuration for the search worker.
///
/// Among a few other things, the configuration primarily controls the way we
/// show search results to users at a very high level.
#[derive(Clone, Debug)]
struct Config {
preprocessor: Option<std::path::PathBuf>,
preprocessor_globs: ignore::overrides::Override,
search_zip: bool,
binary_implicit: grep::searcher::BinaryDetection,
binary_explicit: grep::searcher::BinaryDetection,
}
impl Default for Config {
fn default() -> Config {
Config {
preprocessor: None,
preprocessor_globs: ignore::overrides::Override::empty(),
search_zip: false,
binary_implicit: grep::searcher::BinaryDetection::none(),
binary_explicit: grep::searcher::BinaryDetection::none(),
}
}
}
Config field purposes:
| Field | Type | Purpose |
|---|---|---|
preprocessor |
Option<PathBuf> |
External command to transform files |
preprocessor_globs |
Override |
Which files to preprocess |
search_zip |
bool |
Auto-decompress compressed files |
binary_implicit |
BinaryDetection |
Detection for discovered files |
binary_explicit |
BinaryDetection |
Detection for user-specified files |
Section 2: The Builder Pattern¶
/// A builder for configuring and constructing a search worker.
#[derive(Clone, Debug)]
pub(crate) struct SearchWorkerBuilder {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
}
impl SearchWorkerBuilder {
/// Create a new builder for configuring and constructing a search worker.
pub(crate) fn new() -> SearchWorkerBuilder {
let mut command_builder = grep::cli::CommandReaderBuilder::new();
command_builder.async_stderr(true);
SearchWorkerBuilder { config: Config::default(), command_builder }
}
/// Create a new search worker using the given searcher, matcher and
/// printer.
pub(crate) fn build<W: WriteColor>(
&self,
matcher: PatternMatcher,
searcher: grep::searcher::Searcher,
printer: Printer<W>,
) -> SearchWorker<W> {
let config = self.config.clone();
let command_builder = self.command_builder.clone();
// Lazy initialization: only build decompressor when enabled
let decomp_builder = config.search_zip.then(|| {
let mut decomp_builder =
grep::cli::DecompressionReaderBuilder::new();
decomp_builder.async_stderr(true);
decomp_builder
});
SearchWorker {
config,
command_builder,
decomp_builder,
matcher,
searcher,
printer,
}
}
}
The .then() pattern:
// Option<T> if condition is true, None otherwise
let maybe_builder = condition.then(|| expensive_construction());
Section 3: The Builder Methods¶
impl SearchWorkerBuilder {
/// Set the path to a preprocessor command.
pub(crate) fn preprocessor(
&mut self,
cmd: Option<std::path::PathBuf>,
) -> anyhow::Result<&mut SearchWorkerBuilder> {
if let Some(ref prog) = cmd {
// Validate and resolve binary path early
let bin = grep::cli::resolve_binary(prog)?;
self.config.preprocessor = Some(bin);
} else {
self.config.preprocessor = None;
}
Ok(self)
}
/// Set the globs for determining which files should be run through the
/// preprocessor.
pub(crate) fn preprocessor_globs(
&mut self,
globs: ignore::overrides::Override,
) -> &mut SearchWorkerBuilder {
self.config.preprocessor_globs = globs;
self
}
/// Enable the decompression and searching of common compressed files.
pub(crate) fn search_zip(
&mut self,
yes: bool,
) -> &mut SearchWorkerBuilder {
self.config.search_zip = yes;
self
}
/// Binary detection for implicitly discovered files (directory traversal).
/// Typically uses BinaryDetection::quit() to skip binary files.
pub(crate) fn binary_detection_implicit(
&mut self,
detection: grep::searcher::BinaryDetection,
) -> &mut SearchWorkerBuilder {
self.config.binary_implicit = detection;
self
}
/// Binary detection for explicitly requested files.
/// Should NOT use quit() — never skip user-requested files.
pub(crate) fn binary_detection_explicit(
&mut self,
detection: grep::searcher::BinaryDetection,
) -> &mut SearchWorkerBuilder {
self.config.binary_explicit = detection;
self
}
}
BinaryDetection modes:
BinaryDetection::none() // No detection, search everything
BinaryDetection::quit() // Stop searching if binary detected
BinaryDetection::convert() // Replace NUL bytes with replacement char
Section 4: SearchResult¶
/// The result of executing a search.
#[derive(Clone, Debug, Default)]
pub(crate) struct SearchResult {
has_match: bool,
stats: Option<grep::printer::Stats>,
}
impl SearchResult {
/// Whether the search found a match or not.
pub(crate) fn has_match(&self) -> bool {
self.has_match
}
/// Return aggregate search statistics for a single search, if available.
///
/// It can be expensive to compute statistics, so these are only present
/// if explicitly enabled in the printer provided by the caller.
pub(crate) fn stats(&self) -> Option<&grep::printer::Stats> {
self.stats.as_ref()
}
}
Exit code mapping:
// In main.rs
if search_result.has_match() {
ExitCode::from(0) // Matches found
} else {
ExitCode::from(1) // No matches
}
Section 5: PatternMatcher Enum¶
/// The pattern matcher used by a search worker.
#[derive(Clone, Debug)]
pub(crate) enum PatternMatcher {
RustRegex(grep::regex::RegexMatcher),
#[cfg(feature = "pcre2")]
PCRE2(grep::pcre2::RegexMatcher),
}
Conditional compilation:
// When pcre2 feature is disabled, this variant doesn't exist
#[cfg(feature = "pcre2")]
PCRE2(grep::pcre2::RegexMatcher),
// Usage in match expressions:
match self.matcher {
RustRegex(ref m) => /* ... */,
#[cfg(feature = "pcre2")]
PCRE2(ref m) => /* ... */,
}
Why enum over trait object:
// Enum approach (used here):
match matcher { RustRegex(m) => m.search(), PCRE2(m) => m.search() }
// Pro: inlinable, no vtable
// Con: closed set of variants
// Trait object approach:
dyn Matcher
// Pro: open to extension
// Con: virtual dispatch overhead
Section 6: Printer Enum¶
/// The printer used by a search worker.
///
/// The `W` type parameter refers to the type of the underlying writer.
#[derive(Clone, Debug)]
pub(crate) enum Printer<W> {
/// Use the standard printer, which supports the classic grep-like format.
Standard(grep::printer::Standard<W>),
/// Use the summary printer, which supports aggregate displays of search
/// results.
Summary(grep::printer::Summary<W>),
/// A JSON printer, which emits results in the JSON Lines format.
JSON(grep::printer::JSON<W>),
}
impl<W: WriteColor> Printer<W> {
/// Return a mutable reference to the underlying printer's writer.
pub(crate) fn get_mut(&mut self) -> &mut W {
match *self {
Printer::Standard(ref mut p) => p.get_mut(),
Printer::Summary(ref mut p) => p.get_mut(),
Printer::JSON(ref mut p) => p.get_mut(),
}
}
}
Printer types by mode:
| Flag | Printer Type | Purpose |
|---|---|---|
| (default) | Standard | Line-by-line matches |
-c |
Summary | Match counts |
-l |
Summary | Filenames only |
--json |
JSON | Structured output |
Section 7: The SearchWorker Struct¶
/// A worker for executing searches.
///
/// It is intended for a single worker to execute many searches, and is
/// generally intended to be used from a single thread.
#[derive(Clone, Debug)]
pub(crate) struct SearchWorker<W> {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
decomp_builder: Option<grep::cli::DecompressionReaderBuilder>,
matcher: PatternMatcher,
searcher: grep::searcher::Searcher,
printer: Printer<W>,
}
Component responsibilities:
| Component | Crate | Purpose |
|---|---|---|
matcher |
grep-regex / grep-pcre2 | Pattern matching |
searcher |
grep-searcher | File I/O, line iteration |
printer |
grep-printer | Output formatting |
command_builder |
grep-cli | Running preprocessors |
decomp_builder |
grep-cli | Running decompressors |
Section 8: The Search Method¶
impl<W: WriteColor> SearchWorker<W> {
/// Execute a search over the given haystack.
pub(crate) fn search(
&mut self,
haystack: &crate::haystack::Haystack,
) -> io::Result<SearchResult> {
// Select binary detection based on explicit vs implicit
let bin = if haystack.is_explicit() {
self.config.binary_explicit.clone()
} else {
self.config.binary_implicit.clone()
};
let path = haystack.path();
log::trace!("{}: binary detection: {:?}", path.display(), bin);
self.searcher.set_binary_detection(bin);
// Decision tree: route to appropriate handler
if haystack.is_stdin() {
self.search_reader(path, &mut io::stdin().lock())
} else if self.should_preprocess(path) {
self.search_preprocessor(path)
} else if self.should_decompress(path) {
self.search_decompress(path)
} else {
self.search_path(path)
}
}
/// Return a mutable reference to the underlying printer.
pub(crate) fn printer(&mut self) -> &mut Printer<W> {
&mut self.printer
}
}
Routing priority: 1. stdin → search_reader (can't preprocess or mmap) 2. preprocessor match → search_preprocessor (user override) 3. decompression match → search_decompress (transparent) 4. otherwise → search_path (fast path with mmap)
Section 9: Helper Predicates¶
impl<W: WriteColor> SearchWorker<W> {
/// Returns true if and only if the given file path should be
/// decompressed before searching.
fn should_decompress(&self, path: &Path) -> bool {
self.decomp_builder.as_ref().is_some_and(|decomp_builder| {
decomp_builder.get_matcher().has_command(path)
})
}
/// Returns true if and only if the given file path should be run through
/// the preprocessor.
fn should_preprocess(&self, path: &Path) -> bool {
// No preprocessor configured
if !self.config.preprocessor.is_some() {
return false;
}
// No globs = preprocess everything
if self.config.preprocessor_globs.is_empty() {
return true;
}
// Check if path matches globs (negation-aware)
!self.config.preprocessor_globs.matched(path, false).is_ignore()
}
}
The is_some_and pattern (Rust 1.70+):
// Old way
option.map(|x| predicate(x)).unwrap_or(false)
// New way
option.is_some_and(|x| predicate(x))
Section 10: Preprocessor Search¶
/// Search the given file path by first asking the preprocessor for the
/// data to search instead of opening the path directly.
fn search_preprocessor(
&mut self,
path: &Path,
) -> io::Result<SearchResult> {
use std::{fs::File, process::Stdio};
let bin = self.config.preprocessor.as_ref().unwrap();
let mut cmd = std::process::Command::new(bin);
cmd.arg(path).stdin(Stdio::from(File::open(path)?));
let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("preprocessor command could not start: '{cmd:?}': {err}"),
)
})?;
let result = self.search_reader(path, &mut rdr).map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("preprocessor command failed: '{cmd:?}': {err}"),
)
});
// Critical: wait for child process
let close_result = rdr.close();
let search_result = result?;
close_result?;
Ok(search_result)
}
Error handling pattern:
// Search might fail
let result = self.search_reader(path, &mut rdr);
// Close might fail (check after search completes)
let close_result = rdr.close();
// Return first error encountered
let search_result = result?;
close_result?;
Ok(search_result)
Section 11: Decompression Search¶
/// Attempt to decompress the data at the given file path and search the
/// result.
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
let Some(ref decomp_builder) = self.decomp_builder else {
return self.search_path(path);
};
let mut rdr = decomp_builder.build(path)?;
let result = self.search_reader(path, &mut rdr);
let close_result = rdr.close();
let search_result = result?;
close_result?;
Ok(search_result)
}
Supported formats (via grep-cli):
- gzip (.gz)
- bzip2 (.bz2)
- xz (.xz)
- lz4 (.lz4)
- lzma (.lzma)
- zstd (.zst)
Section 12: Direct File Search¶
/// Search the contents of the given file path.
fn search_path(&mut self, path: &Path) -> io::Result<SearchResult> {
use self::PatternMatcher::*;
let (searcher, printer) = (&mut self.searcher, &mut self.printer);
match self.matcher {
RustRegex(ref m) => search_path(m, searcher, printer, path),
#[cfg(feature = "pcre2")]
PCRE2(ref m) => search_path(m, searcher, printer, path),
}
}
Why search_path is the fast path: - Memory mapping possible (avoids copying to userspace) - No child process overhead - No decompression overhead - Direct syscall-level optimizations available
Section 13: Reader-Based Search¶
/// Executes a search on the given reader, which may or may not correspond
/// directly to the contents of the given file path.
///
/// Generally speaking, this method should only be used when there is no
/// other choice. Searching via `search_path` provides more opportunities
/// for optimizations (such as memory maps).
fn search_reader<R: io::Read>(
&mut self,
path: &Path,
rdr: &mut R,
) -> io::Result<SearchResult> {
use self::PatternMatcher::*;
let (searcher, printer) = (&mut self.searcher, &mut self.printer);
match self.matcher {
RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr),
#[cfg(feature = "pcre2")]
PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr),
}
}
When search_reader is used: - stdin (can't be mmapped) - Preprocessor output (pipe, not file) - Decompressor output (pipe, not file)
Section 14: The Free Functions¶
/// Search the contents of the given file path using the given matcher,
/// searcher and printer.
fn search_path<M: Matcher, W: WriteColor>(
matcher: M,
searcher: &mut grep::searcher::Searcher,
printer: &mut Printer<W>,
path: &Path,
) -> io::Result<SearchResult> {
match *printer {
Printer::Standard(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::Summary(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::JSON(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: Some(sink.stats().clone()),
})
}
}
}
The Sink pattern:
// Printer creates a sink that knows how to handle matches
let mut sink = printer.sink_with_path(&matcher, path);
// Searcher produces match events, sink handles them
searcher.search_path(&matcher, path, &mut sink)?;
// Sink accumulates results
sink.has_match() // Did anything match?
sink.stats() // Aggregate statistics
Quick Reference: Key Types¶
// From this module
struct SearchWorkerBuilder { config, command_builder }
struct SearchWorker<W> { config, matcher, searcher, printer, ... }
struct SearchResult { has_match, stats }
enum PatternMatcher { RustRegex(...), PCRE2(...) }
enum Printer<W> { Standard(...), Summary(...), JSON(...) }
// From grep-searcher
struct Searcher; // File reading + line iteration
enum BinaryDetection { None, Quit, Convert }
// From grep-printer
struct Standard<W>; // Line-by-line printer
struct Summary<W>; // Aggregate printer
struct JSON<W>; // Structured printer
struct Stats; // Match statistics
// From grep-matcher (trait)
trait Matcher { fn find_iter(...) }
// From grep-cli
struct CommandReaderBuilder; // Runs external commands
struct DecompressionReaderBuilder; // Runs decompressors
Data Flow Summary¶
Haystack
│
▼
SearchWorker.search()
│
├─► is_stdin? ─► search_reader(stdin)
│
├─► preprocess? ─► search_preprocessor() ─► search_reader(cmd output)
│
├─► decompress? ─► search_decompress() ─► search_reader(cmd output)
│
└─► otherwise ─► search_path()
│
▼
match on Printer
│
┌──────┼──────┐
▼ ▼ ▼
Standard Summary JSON
│ │ │
▼ ▼ ▼
sink_with_path(&matcher, path)
│
▼
searcher.search_path(&matcher, path, &sink)
│
▼
SearchResult { has_match, stats }