Skip to content

ripgrep crates/core/main.rs: Code Companion

Reference code for the Application Entry Point lecture. Sections correspond to the lecture document.


Section 1: The Allocator Override

// Conditional compilation: only applies when ALL conditions are true
// - target_env = "musl" (building against musl libc)
// - target_pointer_width = "64" (64-bit architecture)
#[cfg(all(target_env = "musl", target_pointer_width = "64"))]
#[global_allocator]  // Replaces default allocator for entire program
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

The #[global_allocator] attribute designates this static as the memory allocator for all heap allocations. When the #[cfg] conditions aren't met, this code doesn't exist in the compiled binary at all—not disabled, literally absent.


Section 2: The Main Function and Exit Code Philosophy

use std::process::ExitCode;

/// Then, as it was, then again it will be.
fn main() -> ExitCode {
    match run(flags::parse()) {
        Ok(code) => code,
        Err(err) => {
            // Walk the error chain looking for broken pipe
            for cause in err.chain() {
                if let Some(ioerr) = cause.downcast_ref::<std::io::Error>() {
                    if ioerr.kind() == std::io::ErrorKind::BrokenPipe {
                        // Unix convention: broken pipe = graceful exit
                        return ExitCode::from(0);
                    }
                }
            }
            // eprintln_locked! ensures thread-safe error output
            eprintln_locked!("{:#}", err);  // {:#} = alternate Display format
            ExitCode::from(2)  // Exit code 2 = error condition
        }
    }
}

The err.chain() method comes from anyhow::Error and iterates through the entire error cause chain. The downcast_ref attempts to view the error as a specific type. Exit code 2 indicates an error (distinct from exit code 1, which means "no matches found").


Section 3: The Run Function and Mode Dispatch

fn run(result: crate::flags::ParseResult<HiArgs>) -> anyhow::Result<ExitCode> {
    use crate::flags::{Mode, ParseResult};

    // Three-way split: errors, special modes, normal operation
    let args = match result {
        ParseResult::Err(err) => return Err(err),
        ParseResult::Special(mode) => return special(mode),  // --help, --version
        ParseResult::Ok(args) => args,
    };

    // Mode dispatch with optimization checks
    let matched = match args.mode() {
        // Early exit if pattern can never match
        Mode::Search(_) if !args.matches_possible() => false,
        // Single-threaded vs parallel based on thread count
        Mode::Search(mode) if args.threads() == 1 => search(&args, mode)?,
        Mode::Search(mode) => search_parallel(&args, mode)?,
        // File listing modes (--files flag)
        Mode::Files if args.threads() == 1 => files(&args)?,
        Mode::Files => files_parallel(&args)?,
        // Informational modes
        Mode::Types => return types(&args),
        Mode::Generate(mode) => return generate(mode),
    };

    // Exit code logic: 0 = matches found, 1 = no matches, 2 = error
    Ok(if matched && (args.quiet() || !messages::errored()) {
        ExitCode::from(0)
    } else if messages::errored() {
        ExitCode::from(2)
    } else {
        ExitCode::from(1)
    })
}

The matches_possible() check handles edge cases like patterns longer than any possible line. The thread count check ensures sorted output uses single-threaded mode (parallelism would destroy sort order).


Section 4: Single-Threaded Search Architecture

fn search(args: &HiArgs, mode: SearchMode) -> anyhow::Result<bool> {
    let started_at = std::time::Instant::now();
    let haystack_builder = args.haystack_builder();

    // Build iterator of searchable items, filtering invalid results
    let unsorted = args
        .walk_builder()?
        .build()
        .filter_map(|result| haystack_builder.build_from_result(result));
    let haystacks = args.sort(unsorted);  // Apply sorting if requested

    let mut matched = false;
    let mut searched = false;
    let mut stats = args.stats();  // Option<Stats> - only if --stats flag

    // Compose the three search components: matcher + searcher + printer
    let mut searcher = args.search_worker(
        args.matcher()?,      // Pattern matching engine (regex or PCRE2)
        args.searcher()?,     // Line-oriented search logic
        args.printer(mode, args.stdout()),  // Output formatter
    )?;

    for haystack in haystacks {
        searched = true;
        let search_result = match searcher.search(&haystack) {
            Ok(search_result) => search_result,
            // Broken pipe = user closed output (e.g., `rg pattern | head`)
            Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => break,
            Err(err) => {
                // Log error but continue searching other files
                err_message!("{}: {}", haystack.path().display(), err);
                continue;
            }
        };
        matched = matched || search_result.has_match();
        if let Some(ref mut stats) = stats {
            *stats += search_result.stats().unwrap();
        }
        // Support -q/--quiet mode: exit after first match
        if matched && args.quit_after_match() {
            break;
        }
    }

    // Helpful diagnostic when ignore rules filter everything
    if args.has_implicit_path() && !searched {
        eprint_nothing_searched();
    }
    // Print accumulated statistics if requested
    if let Some(ref stats) = stats {
        let wtr = searcher.printer().get_mut();
        let _ = print_stats(mode, stats, started_at, wtr);
    }
    Ok(matched)
}

The "haystack" abstraction represents anything searchable. The filter_map chain converts directory walk results into haystacks, filtering out entries that can't be searched (directories, symlinks when not following them, etc.).


Section 5: Parallel Search and Thread Coordination

fn search_parallel(args: &HiArgs, mode: SearchMode) -> anyhow::Result<bool> {
    use std::sync::atomic::{AtomicBool, Ordering};

    let started_at = std::time::Instant::now();
    let haystack_builder = args.haystack_builder();
    let bufwtr = args.buffer_writer();  // Thread-safe buffered output

    // Mutex for complex state, AtomicBool for simple flags
    let stats = args.stats().map(std::sync::Mutex::new);
    let matched = AtomicBool::new(false);
    let searched = AtomicBool::new(false);

    // Create template searcher that will be cloned per-thread
    let mut searcher = args.search_worker(
        args.matcher()?,
        args.searcher()?,
        args.printer(mode, bufwtr.buffer()),  // Each clone gets own buffer
    )?;

    // Parallel walker handles thread spawning and work distribution
    args.walk_builder()?.build_parallel().run(|| {
        // Capture shared state by reference
        let bufwtr = &bufwtr;
        let stats = &stats;
        let matched = &matched;
        let searched = &searched;
        let haystack_builder = &haystack_builder;
        let mut searcher = searcher.clone();  // Clone for this thread

        // Return boxed closure - called once per discovered file
        Box::new(move |result| {
            let haystack = match haystack_builder.build_from_result(result) {
                Some(haystack) => haystack,
                None => return WalkState::Continue,
            };
            searched.store(true, Ordering::SeqCst);
            searcher.printer().get_mut().clear();  // Reset buffer

            let search_result = match searcher.search(&haystack) {
                Ok(search_result) => search_result,
                Err(err) => {
                    err_message!("{}: {}", haystack.path().display(), err);
                    return WalkState::Continue;
                }
            };

            if search_result.has_match() {
                matched.store(true, Ordering::SeqCst);
            }
            // Accumulate stats under mutex
            if let Some(ref locked_stats) = *stats {
                let mut stats = locked_stats.lock().unwrap();
                *stats += search_result.stats().unwrap();
            }
            // Print buffer atomically
            if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
                if err.kind() == std::io::ErrorKind::BrokenPipe {
                    return WalkState::Quit;  // Stop all threads
                }
                err_message!("{}: {}", haystack.path().display(), err);
            }
            // WalkState controls traversal: Continue, Skip, or Quit
            if matched.load(Ordering::SeqCst) && args.quit_after_match() {
                WalkState::Quit
            } else {
                WalkState::Continue
            }
        })
    });

    Ok(matched.load(Ordering::SeqCst))
}

SeqCst (sequentially consistent) ordering provides the strongest memory ordering guarantee, ensuring updates are immediately visible across threads. The bufwtr.print() call writes the entire buffer atomically to prevent interleaved output from different threads.


Section 6: The File Listing Modes

/// Single-threaded file listing (--files flag)
fn files(args: &HiArgs) -> anyhow::Result<bool> {
    let haystack_builder = args.haystack_builder();
    let unsorted = args
        .walk_builder()?
        .build()
        .filter_map(|result| haystack_builder.build_from_result(result));
    let haystacks = args.sort(unsorted);

    let mut matched = false;
    let mut path_printer = args.path_printer_builder().build(args.stdout());

    for haystack in haystacks {
        matched = true;
        if args.quit_after_match() {
            break;
        }
        if let Err(err) = path_printer.write(haystack.path()) {
            if err.kind() == std::io::ErrorKind::BrokenPipe {
                break;
            }
            return Err(err.into());
        }
    }
    Ok(matched)
}

/// Parallel file listing with channel-based coordination
fn files_parallel(args: &HiArgs) -> anyhow::Result<bool> {
    use std::{
        sync::{atomic::{AtomicBool, Ordering}, mpsc},
        thread,
    };

    let haystack_builder = args.haystack_builder();
    let mut path_printer = args.path_printer_builder().build(args.stdout());
    let matched = AtomicBool::new(false);
    let (tx, rx) = mpsc::channel::<crate::haystack::Haystack>();

    // Dedicated print thread prevents write tearing
    let print_thread = thread::spawn(move || -> std::io::Result<()> {
        for haystack in rx.iter() {
            path_printer.write(haystack.path())?;
        }
        Ok(())
    });

    args.walk_builder()?.build_parallel().run(|| {
        let haystack_builder = &haystack_builder;
        let matched = &matched;
        let tx = tx.clone();  // Clone sender for each worker

        Box::new(move |result| {
            let haystack = match haystack_builder.build_from_result(result) {
                Some(haystack) => haystack,
                None => return WalkState::Continue,
            };
            matched.store(true, Ordering::SeqCst);
            if args.quit_after_match() {
                WalkState::Quit
            } else {
                match tx.send(haystack) {
                    Ok(_) => WalkState::Continue,
                    Err(_) => WalkState::Quit,  // Receiver dropped
                }
            }
        })
    });

    drop(tx);  // Close channel so print_thread's iter() terminates
    if let Err(err) = print_thread.join().unwrap() {
        if err.kind() != std::io::ErrorKind::BrokenPipe {
            return Err(err.into());
        }
    }
    Ok(matched.load(Ordering::SeqCst))
}

The parallel version uses mpsc::channel (multi-producer, single-consumer) where worker threads send discovered paths to a dedicated print thread. This ensures writes don't interleave. The drop(tx) is crucial—it closes the sending side so the receiver's iter() loop terminates.


Quick Reference

Exit Code Semantics

Code Meaning
0 Matches found (or --files found files)
1 No matches found
2 Error occurred

Mode Dispatch Flow

ParseResult::Err      → Return error immediately
ParseResult::Special  → Handle --help/--version
ParseResult::Ok       → Normal mode dispatch:
                          ├─ Search (threads=1)   → search()
                          ├─ Search (threads>1)   → search_parallel()
                          ├─ Files (threads=1)    → files()
                          ├─ Files (threads>1)    → files_parallel()
                          ├─ Types               → types()
                          └─ Generate            → generate()

Key Types

// Parse result variants
enum ParseResult<T> {
    Err(anyhow::Error),
    Special(SpecialMode),
    Ok(T),
}

// Walk control for parallel traversal
enum WalkState {
    Continue,  // Keep searching
    Skip,      // Skip this directory's children
    Quit,      // Stop all threads
}

// Memory orderings used
Ordering::SeqCst  // Strongest guarantee, used for cross-thread flags

Synchronization Primitives

Primitive Used For Why
AtomicBool matched, searched flags Simple boolean state, no mutex overhead
Mutex Statistics accumulation Complex += operations need exclusivity
mpsc::channel Parallel file listing Prevents write tearing, clean producer/consumer
BufferWriter Parallel search output Atomic buffer printing