Skip to content

Ripgrep search.rs: Code Companion

Reference code for the search.rs lecture. Sections correspond to the lecture document.


Section 1: The Config Struct

/// The configuration for the search worker.
///
/// Among a few other things, the configuration primarily controls the way we
/// show search results to users at a very high level.
#[derive(Clone, Debug)]
struct Config {
    preprocessor: Option<std::path::PathBuf>,
    preprocessor_globs: ignore::overrides::Override,
    search_zip: bool,
    binary_implicit: grep::searcher::BinaryDetection,
    binary_explicit: grep::searcher::BinaryDetection,
}

impl Default for Config {
    fn default() -> Config {
        Config {
            preprocessor: None,
            preprocessor_globs: ignore::overrides::Override::empty(),
            search_zip: false,
            binary_implicit: grep::searcher::BinaryDetection::none(),
            binary_explicit: grep::searcher::BinaryDetection::none(),
        }
    }
}

Config field purposes:

Field Type Purpose
preprocessor Option<PathBuf> External command to transform files
preprocessor_globs Override Which files to preprocess
search_zip bool Auto-decompress compressed files
binary_implicit BinaryDetection Detection for discovered files
binary_explicit BinaryDetection Detection for user-specified files

Section 2: The Builder Pattern

/// A builder for configuring and constructing a search worker.
#[derive(Clone, Debug)]
pub(crate) struct SearchWorkerBuilder {
    config: Config,
    command_builder: grep::cli::CommandReaderBuilder,
}

impl SearchWorkerBuilder {
    /// Create a new builder for configuring and constructing a search worker.
    pub(crate) fn new() -> SearchWorkerBuilder {
        let mut command_builder = grep::cli::CommandReaderBuilder::new();
        command_builder.async_stderr(true);

        SearchWorkerBuilder { config: Config::default(), command_builder }
    }

    /// Create a new search worker using the given searcher, matcher and
    /// printer.
    pub(crate) fn build<W: WriteColor>(
        &self,
        matcher: PatternMatcher,
        searcher: grep::searcher::Searcher,
        printer: Printer<W>,
    ) -> SearchWorker<W> {
        let config = self.config.clone();
        let command_builder = self.command_builder.clone();

        // Lazy initialization: only build decompressor when enabled
        let decomp_builder = config.search_zip.then(|| {
            let mut decomp_builder =
                grep::cli::DecompressionReaderBuilder::new();
            decomp_builder.async_stderr(true);
            decomp_builder
        });

        SearchWorker {
            config,
            command_builder,
            decomp_builder,
            matcher,
            searcher,
            printer,
        }
    }
}

The .then() pattern:

// Option<T> if condition is true, None otherwise
let maybe_builder = condition.then(|| expensive_construction());

Section 3: The Builder Methods

impl SearchWorkerBuilder {
    /// Set the path to a preprocessor command.
    pub(crate) fn preprocessor(
        &mut self,
        cmd: Option<std::path::PathBuf>,
    ) -> anyhow::Result<&mut SearchWorkerBuilder> {
        if let Some(ref prog) = cmd {
            // Validate and resolve binary path early
            let bin = grep::cli::resolve_binary(prog)?;
            self.config.preprocessor = Some(bin);
        } else {
            self.config.preprocessor = None;
        }
        Ok(self)
    }

    /// Set the globs for determining which files should be run through the
    /// preprocessor.
    pub(crate) fn preprocessor_globs(
        &mut self,
        globs: ignore::overrides::Override,
    ) -> &mut SearchWorkerBuilder {
        self.config.preprocessor_globs = globs;
        self
    }

    /// Enable the decompression and searching of common compressed files.
    pub(crate) fn search_zip(
        &mut self,
        yes: bool,
    ) -> &mut SearchWorkerBuilder {
        self.config.search_zip = yes;
        self
    }

    /// Binary detection for implicitly discovered files (directory traversal).
    /// Typically uses BinaryDetection::quit() to skip binary files.
    pub(crate) fn binary_detection_implicit(
        &mut self,
        detection: grep::searcher::BinaryDetection,
    ) -> &mut SearchWorkerBuilder {
        self.config.binary_implicit = detection;
        self
    }

    /// Binary detection for explicitly requested files.
    /// Should NOT use quit() — never skip user-requested files.
    pub(crate) fn binary_detection_explicit(
        &mut self,
        detection: grep::searcher::BinaryDetection,
    ) -> &mut SearchWorkerBuilder {
        self.config.binary_explicit = detection;
        self
    }
}

BinaryDetection modes:

BinaryDetection::none()     // No detection, search everything
BinaryDetection::quit()     // Stop searching if binary detected
BinaryDetection::convert()  // Replace NUL bytes with replacement char

Section 4: SearchResult

/// The result of executing a search.
#[derive(Clone, Debug, Default)]
pub(crate) struct SearchResult {
    has_match: bool,
    stats: Option<grep::printer::Stats>,
}

impl SearchResult {
    /// Whether the search found a match or not.
    pub(crate) fn has_match(&self) -> bool {
        self.has_match
    }

    /// Return aggregate search statistics for a single search, if available.
    ///
    /// It can be expensive to compute statistics, so these are only present
    /// if explicitly enabled in the printer provided by the caller.
    pub(crate) fn stats(&self) -> Option<&grep::printer::Stats> {
        self.stats.as_ref()
    }
}

Exit code mapping:

// In main.rs
if search_result.has_match() {
    ExitCode::from(0)  // Matches found
} else {
    ExitCode::from(1)  // No matches
}

Section 5: PatternMatcher Enum

/// The pattern matcher used by a search worker.
#[derive(Clone, Debug)]
pub(crate) enum PatternMatcher {
    RustRegex(grep::regex::RegexMatcher),
    #[cfg(feature = "pcre2")]
    PCRE2(grep::pcre2::RegexMatcher),
}

Conditional compilation:

// When pcre2 feature is disabled, this variant doesn't exist
#[cfg(feature = "pcre2")]
PCRE2(grep::pcre2::RegexMatcher),

// Usage in match expressions:
match self.matcher {
    RustRegex(ref m) => /* ... */,
    #[cfg(feature = "pcre2")]
    PCRE2(ref m) => /* ... */,
}

Why enum over trait object:

// Enum approach (used here):
match matcher { RustRegex(m) => m.search(), PCRE2(m) => m.search() }
// Pro: inlinable, no vtable
// Con: closed set of variants

// Trait object approach:
dyn Matcher  
// Pro: open to extension
// Con: virtual dispatch overhead

Section 6: Printer Enum

/// The printer used by a search worker.
///
/// The `W` type parameter refers to the type of the underlying writer.
#[derive(Clone, Debug)]
pub(crate) enum Printer<W> {
    /// Use the standard printer, which supports the classic grep-like format.
    Standard(grep::printer::Standard<W>),
    /// Use the summary printer, which supports aggregate displays of search
    /// results.
    Summary(grep::printer::Summary<W>),
    /// A JSON printer, which emits results in the JSON Lines format.
    JSON(grep::printer::JSON<W>),
}

impl<W: WriteColor> Printer<W> {
    /// Return a mutable reference to the underlying printer's writer.
    pub(crate) fn get_mut(&mut self) -> &mut W {
        match *self {
            Printer::Standard(ref mut p) => p.get_mut(),
            Printer::Summary(ref mut p) => p.get_mut(),
            Printer::JSON(ref mut p) => p.get_mut(),
        }
    }
}

Printer types by mode:

Flag Printer Type Purpose
(default) Standard Line-by-line matches
-c Summary Match counts
-l Summary Filenames only
--json JSON Structured output

Section 7: The SearchWorker Struct

/// A worker for executing searches.
///
/// It is intended for a single worker to execute many searches, and is
/// generally intended to be used from a single thread.
#[derive(Clone, Debug)]
pub(crate) struct SearchWorker<W> {
    config: Config,
    command_builder: grep::cli::CommandReaderBuilder,
    decomp_builder: Option<grep::cli::DecompressionReaderBuilder>,
    matcher: PatternMatcher,
    searcher: grep::searcher::Searcher,
    printer: Printer<W>,
}

Component responsibilities:

Component Crate Purpose
matcher grep-regex / grep-pcre2 Pattern matching
searcher grep-searcher File I/O, line iteration
printer grep-printer Output formatting
command_builder grep-cli Running preprocessors
decomp_builder grep-cli Running decompressors

Section 8: The Search Method

impl<W: WriteColor> SearchWorker<W> {
    /// Execute a search over the given haystack.
    pub(crate) fn search(
        &mut self,
        haystack: &crate::haystack::Haystack,
    ) -> io::Result<SearchResult> {
        // Select binary detection based on explicit vs implicit
        let bin = if haystack.is_explicit() {
            self.config.binary_explicit.clone()
        } else {
            self.config.binary_implicit.clone()
        };
        let path = haystack.path();
        log::trace!("{}: binary detection: {:?}", path.display(), bin);

        self.searcher.set_binary_detection(bin);

        // Decision tree: route to appropriate handler
        if haystack.is_stdin() {
            self.search_reader(path, &mut io::stdin().lock())
        } else if self.should_preprocess(path) {
            self.search_preprocessor(path)
        } else if self.should_decompress(path) {
            self.search_decompress(path)
        } else {
            self.search_path(path)
        }
    }

    /// Return a mutable reference to the underlying printer.
    pub(crate) fn printer(&mut self) -> &mut Printer<W> {
        &mut self.printer
    }
}

Routing priority: 1. stdin → search_reader (can't preprocess or mmap) 2. preprocessor match → search_preprocessor (user override) 3. decompression match → search_decompress (transparent) 4. otherwise → search_path (fast path with mmap)


Section 9: Helper Predicates

impl<W: WriteColor> SearchWorker<W> {
    /// Returns true if and only if the given file path should be
    /// decompressed before searching.
    fn should_decompress(&self, path: &Path) -> bool {
        self.decomp_builder.as_ref().is_some_and(|decomp_builder| {
            decomp_builder.get_matcher().has_command(path)
        })
    }

    /// Returns true if and only if the given file path should be run through
    /// the preprocessor.
    fn should_preprocess(&self, path: &Path) -> bool {
        // No preprocessor configured
        if !self.config.preprocessor.is_some() {
            return false;
        }
        // No globs = preprocess everything
        if self.config.preprocessor_globs.is_empty() {
            return true;
        }
        // Check if path matches globs (negation-aware)
        !self.config.preprocessor_globs.matched(path, false).is_ignore()
    }
}

The is_some_and pattern (Rust 1.70+):

// Old way
option.map(|x| predicate(x)).unwrap_or(false)

// New way
option.is_some_and(|x| predicate(x))

/// Search the given file path by first asking the preprocessor for the
/// data to search instead of opening the path directly.
fn search_preprocessor(
    &mut self,
    path: &Path,
) -> io::Result<SearchResult> {
    use std::{fs::File, process::Stdio};

    let bin = self.config.preprocessor.as_ref().unwrap();
    let mut cmd = std::process::Command::new(bin);
    cmd.arg(path).stdin(Stdio::from(File::open(path)?));

    let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| {
        io::Error::new(
            io::ErrorKind::Other,
            format!("preprocessor command could not start: '{cmd:?}': {err}"),
        )
    })?;

    let result = self.search_reader(path, &mut rdr).map_err(|err| {
        io::Error::new(
            io::ErrorKind::Other,
            format!("preprocessor command failed: '{cmd:?}': {err}"),
        )
    });

    // Critical: wait for child process
    let close_result = rdr.close();
    let search_result = result?;
    close_result?;
    Ok(search_result)
}

Error handling pattern:

// Search might fail
let result = self.search_reader(path, &mut rdr);
// Close might fail (check after search completes)
let close_result = rdr.close();
// Return first error encountered
let search_result = result?;
close_result?;
Ok(search_result)

/// Attempt to decompress the data at the given file path and search the
/// result.
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
    let Some(ref decomp_builder) = self.decomp_builder else {
        return self.search_path(path);
    };

    let mut rdr = decomp_builder.build(path)?;
    let result = self.search_reader(path, &mut rdr);
    let close_result = rdr.close();
    let search_result = result?;
    close_result?;
    Ok(search_result)
}

Supported formats (via grep-cli): - gzip (.gz) - bzip2 (.bz2)
- xz (.xz) - lz4 (.lz4) - lzma (.lzma) - zstd (.zst)


/// Search the contents of the given file path.
fn search_path(&mut self, path: &Path) -> io::Result<SearchResult> {
    use self::PatternMatcher::*;

    let (searcher, printer) = (&mut self.searcher, &mut self.printer);
    match self.matcher {
        RustRegex(ref m) => search_path(m, searcher, printer, path),
        #[cfg(feature = "pcre2")]
        PCRE2(ref m) => search_path(m, searcher, printer, path),
    }
}

Why search_path is the fast path: - Memory mapping possible (avoids copying to userspace) - No child process overhead - No decompression overhead - Direct syscall-level optimizations available


/// Executes a search on the given reader, which may or may not correspond
/// directly to the contents of the given file path.
///
/// Generally speaking, this method should only be used when there is no
/// other choice. Searching via `search_path` provides more opportunities
/// for optimizations (such as memory maps).
fn search_reader<R: io::Read>(
    &mut self,
    path: &Path,
    rdr: &mut R,
) -> io::Result<SearchResult> {
    use self::PatternMatcher::*;

    let (searcher, printer) = (&mut self.searcher, &mut self.printer);
    match self.matcher {
        RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr),
        #[cfg(feature = "pcre2")]
        PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr),
    }
}

When search_reader is used: - stdin (can't be mmapped) - Preprocessor output (pipe, not file) - Decompressor output (pipe, not file)


Section 14: The Free Functions

/// Search the contents of the given file path using the given matcher,
/// searcher and printer.
fn search_path<M: Matcher, W: WriteColor>(
    matcher: M,
    searcher: &mut grep::searcher::Searcher,
    printer: &mut Printer<W>,
    path: &Path,
) -> io::Result<SearchResult> {
    match *printer {
        Printer::Standard(ref mut p) => {
            let mut sink = p.sink_with_path(&matcher, path);
            searcher.search_path(&matcher, path, &mut sink)?;
            Ok(SearchResult {
                has_match: sink.has_match(),
                stats: sink.stats().map(|s| s.clone()),
            })
        }
        Printer::Summary(ref mut p) => {
            let mut sink = p.sink_with_path(&matcher, path);
            searcher.search_path(&matcher, path, &mut sink)?;
            Ok(SearchResult {
                has_match: sink.has_match(),
                stats: sink.stats().map(|s| s.clone()),
            })
        }
        Printer::JSON(ref mut p) => {
            let mut sink = p.sink_with_path(&matcher, path);
            searcher.search_path(&matcher, path, &mut sink)?;
            Ok(SearchResult {
                has_match: sink.has_match(),
                stats: Some(sink.stats().clone()),
            })
        }
    }
}

The Sink pattern:

// Printer creates a sink that knows how to handle matches
let mut sink = printer.sink_with_path(&matcher, path);

// Searcher produces match events, sink handles them
searcher.search_path(&matcher, path, &mut sink)?;

// Sink accumulates results
sink.has_match()   // Did anything match?
sink.stats()       // Aggregate statistics

Quick Reference: Key Types

// From this module
struct SearchWorkerBuilder { config, command_builder }
struct SearchWorker<W> { config, matcher, searcher, printer, ... }
struct SearchResult { has_match, stats }
enum PatternMatcher { RustRegex(...), PCRE2(...) }
enum Printer<W> { Standard(...), Summary(...), JSON(...) }

// From grep-searcher
struct Searcher;  // File reading + line iteration
enum BinaryDetection { None, Quit, Convert }

// From grep-printer  
struct Standard<W>;  // Line-by-line printer
struct Summary<W>;   // Aggregate printer
struct JSON<W>;      // Structured printer
struct Stats;        // Match statistics

// From grep-matcher (trait)
trait Matcher { fn find_iter(...) }

// From grep-cli
struct CommandReaderBuilder;       // Runs external commands
struct DecompressionReaderBuilder; // Runs decompressors

Data Flow Summary

Haystack
SearchWorker.search()
    ├─► is_stdin?      ─► search_reader(stdin)
    ├─► preprocess?    ─► search_preprocessor() ─► search_reader(cmd output)
    ├─► decompress?    ─► search_decompress()   ─► search_reader(cmd output)
    └─► otherwise      ─► search_path()
                       match on Printer
                       ┌──────┼──────┐
                       ▼      ▼      ▼
                   Standard Summary  JSON
                       │      │      │
                       ▼      ▼      ▼
                   sink_with_path(&matcher, path)
                   searcher.search_path(&matcher, path, &sink)
                       SearchResult { has_match, stats }