Skip to content

Ripgrep hiargs.rs: Code Companion

Reference code for the hiargs.rs lecture. Sections correspond to the lecture document.


Section 1: The HiArgs Struct

#[derive(Debug)]
pub(crate) struct HiArgs {
    // Display options
    color: ColorChoice,
    colors: grep::printer::ColorSpecs,
    column: bool,
    heading: bool,
    line_number: bool,
    byte_offset: bool,

    // Search behavior
    case: CaseMode,
    boundary: Option<BoundaryMode>,
    fixed_strings: bool,
    invert_match: bool,
    multiline: bool,
    multiline_dotall: bool,

    // What to search
    patterns: Patterns,
    paths: Paths,
    mode: Mode,

    // Filtering
    types: ignore::types::Types,
    globs: ignore::overrides::Override,
    binary: BinaryDetection,
    max_filesize: Option<u64>,
    max_depth: Option<usize>,

    // Performance
    threads: usize,
    mmap_choice: grep::searcher::MmapChoice,
    dfa_size_limit: Option<usize>,
    regex_size_limit: Option<usize>,

    // ... approximately 70 fields total
}

Field categories:

Category Examples
Display color, heading, line_number, column
Search case, boundary, multiline, invert_match
Filtering types, globs, binary, hidden
Performance threads, mmap_choice, *_size_limit
Context context, context_separator, file_separator
Output format mode, quiet, stats, vimgrep

Section 2: The Transformation — from_low_args

impl HiArgs {
    pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
        // Assertion: special modes already handled
        assert_eq!(None, low.special, "special mode demands short-circuiting");

        // Validate sorting support
        if let Some(ref sort) = low.sort {
            sort.supported()?;
        }

        // Mode adjustments for flag interactions
        match low.mode {
            Mode::Search(ref mut mode) => match *mode {
                // -v --count-matches → -v --count
                SearchMode::CountMatches if low.invert_match => {
                    *mode = SearchMode::Count;
                }
                // -o --count → --count-matches
                SearchMode::Count if low.only_matching => {
                    *mode = SearchMode::CountMatches;
                }
                _ => {}
            },
            _ => {}
        }

        // Initialize shared state
        let mut state = State::new()?;

        // Sequential conversions (order matters!)
        let patterns = Patterns::from_low_args(&mut state, &mut low)?;
        let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
        let binary = BinaryDetection::from_low_args(&state, &low);
        let colors = take_color_specs(&mut state, &mut low);
        let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?;
        let stats = stats(&low);
        let types = types(&low)?;
        let globs = globs(&state, &low)?;
        let pre_globs = preprocessor_globs(&state, &low)?;

        // Derived values...

        Ok(HiArgs { /* ... */ })
    }
}

Flag interaction examples:

// Invert match + count-matches doesn't make sense semantically
// Counting inverted matches → just count lines without matches
SearchMode::CountMatches if low.invert_match => SearchMode::Count

// Only-matching + count is effectively count-matches
// When only printing matched parts, counting them makes sense
SearchMode::Count if low.only_matching => SearchMode::CountMatches

Section 3: State Management

#[derive(Debug)]
struct State {
    /// Is stdout connected to a terminal?
    is_terminal_stdout: bool,
    /// Has stdin already been read (for patterns)?
    stdin_consumed: bool,
    /// Captured at startup, doesn't change
    cwd: PathBuf,
}

impl State {
    fn new() -> anyhow::Result<State> {
        use std::io::IsTerminal;

        let cwd = current_dir()?;
        log::debug!("read CWD from environment: {}", cwd.display());

        Ok(State {
            is_terminal_stdout: std::io::stdout().is_terminal(),
            stdin_consumed: false,
            cwd,
        })
    }
}

How state affects behavior:

// Terminal detection → default colors
let color = match low.color {
    ColorChoice::Auto if !state.is_terminal_stdout => ColorChoice::Never,
    _ => low.color,
};

// Terminal detection → default line numbers
let line_number = low.line_number.unwrap_or_else(|| {
    state.is_terminal_stdout && !paths.is_only_stdin()
});

// Stdin tracking → prevent double consumption
if path == Path::new("-") && state.stdin_consumed {
    anyhow::bail!("stdin has already been consumed");
}

Section 4: Pattern Handling

#[derive(Debug)]
struct Patterns {
    patterns: Vec<String>,
}

impl Patterns {
    fn from_low_args(
        state: &mut State,
        low: &mut LowArgs,
    ) -> anyhow::Result<Patterns> {
        // Non-search mode = no patterns needed
        if !matches!(low.mode, Mode::Search(_)) {
            return Ok(Patterns { patterns: vec![] });
        }

        // No -e/-f flags = first positional is the pattern
        if low.patterns.is_empty() {
            anyhow::ensure!(
                !low.positional.is_empty(),
                "ripgrep requires at least one pattern"
            );
            let ospat = low.positional.remove(0);
            let Ok(pat) = ospat.into_string() else {
                anyhow::bail!("pattern is not valid UTF-8")
            };
            return Ok(Patterns { patterns: vec![pat] });
        }

        // Collect from -e/--regexp and -f/--file with deduplication
        let mut seen = HashSet::new();
        let mut patterns = Vec::with_capacity(low.patterns.len());

        let mut add = |pat: String| {
            if !seen.contains(&pat) {
                seen.insert(pat.clone());
                patterns.push(pat);
            }
        };

        for source in low.patterns.drain(..) {
            match source {
                PatternSource::Regexp(pat) => add(pat),
                PatternSource::File(path) => {
                    if path == Path::new("-") {
                        anyhow::ensure!(
                            !state.stdin_consumed,
                            "stdin has already been consumed"
                        );
                        for pat in grep::cli::patterns_from_stdin()? {
                            add(pat);
                        }
                        state.stdin_consumed = true;  // Mark consumed!
                    } else {
                        for pat in grep::cli::patterns_from_path(&path)? {
                            add(pat);
                        }
                    }
                }
            }
        }
        Ok(Patterns { patterns })
    }
}

Pattern sources:

Source Example Notes
Positional rg foo First arg when no -e/-f
-e flag rg -e foo -e bar Can repeat
-f flag rg -f patterns.txt One pattern per line
-f - rg -f - Read from stdin

Section 5: Path Handling

#[derive(Debug)]
struct Paths {
    paths: Vec<PathBuf>,
    has_implicit_path: bool,  // Did we guess the path?
    is_one_file: bool,        // Exactly one non-directory?
}

impl Paths {
    fn from_low_args(
        state: &mut State,
        _: &Patterns,  // Ensures patterns processed first
        low: &mut LowArgs,
    ) -> anyhow::Result<Paths> {
        // Explicit paths provided
        let mut paths = Vec::with_capacity(low.positional.len());
        for osarg in low.positional.drain(..) {
            let path = PathBuf::from(osarg);
            if state.stdin_consumed && path == Path::new("-") {
                anyhow::bail!("attempted to search stdin after reading patterns from it");
            }
            paths.push(path);
        }

        if !paths.is_empty() {
            let is_one_file = paths.len() == 1
                && (paths[0] == Path::new("-") || !paths[0].is_dir());
            return Ok(Paths { 
                paths, 
                has_implicit_path: false, 
                is_one_file 
            });
        }

        // No paths: use heuristics
        let is_readable_stdin = grep::cli::is_readable_stdin();
        let use_cwd = !is_readable_stdin
            || state.stdin_consumed
            || !matches!(low.mode, Mode::Search(_));

        let (path, is_one_file) = if use_cwd {
            (PathBuf::from("./"), false)
        } else {
            (PathBuf::from("-"), true)  // Search stdin
        };

        Ok(Paths { 
            paths: vec![path], 
            has_implicit_path: true, 
            is_one_file 
        })
    }
}

Heuristic decision tree:

No explicit paths given?
├── stdin readable AND not consumed AND search mode?
│   └── Yes → Search stdin
│   └── No  → Search current directory
└── Explicit paths → Use them directly

Section 6: Binary Detection

#[derive(Debug)]
struct BinaryDetection {
    explicit: grep::searcher::BinaryDetection,  // For named files
    implicit: grep::searcher::BinaryDetection,  // For discovered files
}

impl BinaryDetection {
    fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
        let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
        let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);

        // Explicit files: never quit early
        let explicit = if none {
            grep::searcher::BinaryDetection::none()
        } else {
            grep::searcher::BinaryDetection::convert(b'\x00')
        };

        // Implicit files: can quit early
        let implicit = if none {
            grep::searcher::BinaryDetection::none()
        } else if convert {
            grep::searcher::BinaryDetection::convert(b'\x00')
        } else {
            grep::searcher::BinaryDetection::quit(b'\x00')  // Default!
        };

        BinaryDetection { explicit, implicit }
    }
}

Binary detection modes:

Mode Explicit files Implicit files
Default Convert NUL Quit on NUL
-a/--text None None
--binary Convert NUL Convert NUL
--null-data None None

Section 7: Building the Matcher

pub(crate) fn matcher(&self) -> anyhow::Result<PatternMatcher> {
    match self.engine {
        EngineChoice::Default => match self.matcher_rust() {
            Ok(m) => Ok(m),
            Err(err) => {
                anyhow::bail!(suggest_other_engine(err.to_string()));
            }
        },
        EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?),
        EngineChoice::Auto => {
            // Try Rust first
            let rust_err = match self.matcher_rust() {
                Ok(m) => return Ok(m),
                Err(err) => err,
            };
            // Fall back to PCRE2
            let pcre_err = match self.matcher_pcre2() {
                Ok(m) => return Ok(m),
                Err(err) => err,
            };
            // Both failed — show both errors
            anyhow::bail!(
                "regex could not be compiled with either engine.\n\n\
                 Rust regex error:\n{rust_err}\n\n\
                 PCRE2 error:\n{pcre_err}"
            );
        }
    }
}

fn matcher_rust(&self) -> anyhow::Result<PatternMatcher> {
    let mut builder = grep::regex::RegexMatcherBuilder::new();
    builder
        .multi_line(true)
        .unicode(!self.no_unicode)
        .octal(false)
        .fixed_strings(self.fixed_strings);

    // Case sensitivity
    match self.case {
        CaseMode::Sensitive => builder.case_insensitive(false),
        CaseMode::Insensitive => builder.case_insensitive(true),
        CaseMode::Smart => builder.case_smart(true),
    };

    // Word/line boundaries
    if let Some(ref boundary) = self.boundary {
        match *boundary {
            BoundaryMode::Line => builder.whole_line(true),
            BoundaryMode::Word => builder.word(true),
        };
    }

    // Line terminator handling
    if self.multiline {
        builder.dot_matches_new_line(self.multiline_dotall);
    } else {
        builder.line_terminator(Some(b'\n'));
        if self.null_data {
            builder.line_terminator(Some(b'\x00'));
        }
    }

    // Binary detection interaction
    if !self.binary.is_none() {
        builder.ban_byte(Some(b'\x00'));
    }

    let m = builder.build_many(&self.patterns.patterns)?;
    Ok(PatternMatcher::RustRegex(m))
}

Error suggestion helpers:

fn suggest_pcre2(msg: &str) -> Option<String> {
    if msg.contains("backreferences") || msg.contains("look-around") {
        Some(format!(
            "{msg}\n\nConsider enabling PCRE2 with the --pcre2 flag."
        ))
    } else {
        None
    }
}

fn suggest_multiline(msg: String) -> String {
    if msg.contains("the literal") && msg.contains("not allowed") {
        format!("{msg}\n\nConsider enabling multiline mode with -U.")
    } else {
        msg
    }
}

Section 8: Building the Searcher

pub(crate) fn searcher(&self) -> anyhow::Result<grep::searcher::Searcher> {
    // Line terminator configuration
    let line_term = if self.crlf {
        grep::matcher::LineTerminator::crlf()
    } else if self.null_data {
        grep::matcher::LineTerminator::byte(b'\x00')
    } else {
        grep::matcher::LineTerminator::byte(b'\n')
    };

    let mut builder = grep::searcher::SearcherBuilder::new();
    builder
        .line_terminator(line_term)
        .line_number(self.line_number)
        .invert_match(self.invert_match)
        .multi_line(self.multiline)
        .memory_map(self.mmap_choice.clone())
        .max_matches(self.max_count)
        .stop_on_nonmatch(self.stop_on_nonmatch);

    // Context configuration
    match self.context {
        ContextMode::Passthru => builder.passthru(true),
        ContextMode::Limited(ref limited) => {
            let (before, after) = limited.get();
            builder.before_context(before);
            builder.after_context(after);
        }
    }

    // Encoding configuration
    match self.encoding {
        EncodingMode::Auto => {}  // Default BOM sniffing
        EncodingMode::Some(ref enc) => {
            builder.encoding(Some(enc.clone()));
        }
        EncodingMode::Disabled => {
            builder.bom_sniffing(false);
        }
    }

    Ok(builder.build())
}

Memory map decision:

let mmap_choice = {
    let maybe = unsafe { grep::searcher::MmapChoice::auto() };
    let never = grep::searcher::MmapChoice::never();

    match low.mmap {
        MmapMode::Auto => {
            // Heuristic: mmap for ≤10 regular files
            if paths.paths.len() <= 10 
                && paths.paths.iter().all(|p| p.is_file()) 
            {
                maybe
            } else {
                never
            }
        }
        MmapMode::AlwaysTryMmap => maybe,
        MmapMode::Never => never,
    }
};

Section 9: Building the Printer

pub(crate) fn printer<W: termcolor::WriteColor>(
    &self,
    search_mode: SearchMode,
    wtr: W,
) -> Printer<W> {
    // Determine summary kind for non-standard modes
    let summary_kind = if self.quiet {
        match search_mode {
            SearchMode::FilesWithMatches | SearchMode::Count 
            | SearchMode::CountMatches | SearchMode::JSON 
            | SearchMode::Standard => SummaryKind::QuietWithMatch,
            SearchMode::FilesWithoutMatch => SummaryKind::QuietWithoutMatch,
        }
    } else {
        match search_mode {
            SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
            SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch,
            SearchMode::Count => SummaryKind::Count,
            SearchMode::CountMatches => SummaryKind::CountMatches,
            SearchMode::JSON => return Printer::JSON(self.printer_json(wtr)),
            SearchMode::Standard => {
                return Printer::Standard(self.printer_standard(wtr))
            }
        }
    };
    Printer::Summary(self.printer_summary(wtr, summary_kind))
}

fn printer_standard<W: termcolor::WriteColor>(
    &self,
    wtr: W,
) -> grep::printer::Standard<W> {
    let mut builder = grep::printer::StandardBuilder::new();
    builder
        .byte_offset(self.byte_offset)
        .color_specs(self.colors.clone())
        .column(self.column)
        .heading(self.heading)
        .hyperlink(self.hyperlink_config.clone())
        .max_columns(self.max_columns)
        .max_columns_preview(self.max_columns_preview)
        .only_matching(self.only_matching)
        .path(self.with_filename)
        .replacement(self.replace.clone().map(|r| r.into()))
        .stats(self.stats.is_some())
        .trim_ascii(self.trim);

    // Single-threaded: printer owns file separator
    if self.threads == 1 {
        builder.separator_search(self.file_separator.clone());
    }

    builder.build(wtr)
}

Printer types:

Mode Printer Output
Standard Standard<W> Matching lines with context
-l Summary<W> Filenames only
-c Summary<W> Counts per file
--json JSON<W> Structured JSON lines

Section 10: Building the Walker

pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
    let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);

    // Add additional paths
    for path in self.paths.paths.iter().skip(1) {
        builder.add(path);
    }

    // Add custom ignore files
    if !self.no_ignore_files {
        for path in self.ignore_file.iter() {
            if let Some(err) = builder.add_ignore(path) {
                ignore_message!("{err}");
            }
        }
    }

    builder
        .max_depth(self.max_depth)
        .follow_links(self.follow)
        .max_filesize(self.max_filesize)
        .threads(self.threads)
        .same_file_system(self.one_file_system)
        .overrides(self.globs.clone())
        .types(self.types.clone())
        // Ignore rule configuration
        .hidden(!self.hidden)
        .parents(!self.no_ignore_parent)
        .ignore(!self.no_ignore_dot)
        .git_global(!self.no_ignore_vcs && !self.no_ignore_global)
        .git_ignore(!self.no_ignore_vcs)
        .git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
        .require_git(!self.no_require_git)
        .ignore_case_insensitive(self.ignore_file_case_insensitive);

    // Add .rgignore support
    if !self.no_ignore_dot {
        builder.add_custom_ignore_filename(".rgignore");
    }

    // Path sorting during traversal (ascending only)
    if let Some(ref sort) = self.sort {
        if !sort.reverse && matches!(sort.kind, SortModeKind::Path) {
            builder.sort_by_file_name(|a, b| a.cmp(b));
        }
    }

    Ok(builder)
}

Ignore file precedence (highest to lowest):

  1. Command-line --ignore-file
  2. .rgignore
  3. .ignore
  4. .gitignore
  5. Global git ignores

Section 11: Derived Configuration

// Thread count derivation
let threads = if low.sort.is_some() || paths.is_one_file {
    1  // Sorting or single file → no parallelism benefit
} else if let Some(threads) = low.threads {
    threads  // User specified
} else {
    std::thread::available_parallelism()
        .map_or(1, |n| n.get())
        .min(12)  // Cap at 12
};

// Line number derivation
let line_number = low.line_number.unwrap_or_else(|| {
    if low.quiet { return false; }
    let Mode::Search(ref mode) = low.mode else { return false };
    match *mode {
        SearchMode::FilesWithMatches
        | SearchMode::FilesWithoutMatch
        | SearchMode::Count
        | SearchMode::CountMatches => false,
        SearchMode::JSON => true,
        SearchMode::Standard => {
            (state.is_terminal_stdout && !paths.is_only_stdin())
                || column
                || low.vimgrep
        }
    }
});

// Heading derivation
let heading = match low.heading {
    None => !low.vimgrep && state.is_terminal_stdout,
    Some(false) => false,
    Some(true) => !low.vimgrep,  // vimgrep overrides
};

// Filename display derivation
let with_filename = low.with_filename
    .unwrap_or_else(|| low.vimgrep || !paths.is_one_file);

Section 12: Helper Functions

Type definitions:

fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
    let mut builder = ignore::types::TypesBuilder::new();
    builder.add_defaults();  // rust, python, c, etc.

    for tychange in low.type_changes.iter() {
        match *tychange {
            TypeChange::Clear { ref name } => builder.clear(name),
            TypeChange::Add { ref def } => builder.add_def(def)?,
            TypeChange::Select { ref name } => builder.select(name),
            TypeChange::Negate { ref name } => builder.negate(name),
        }
    }
    Ok(builder.build()?)
}

Glob overrides:

fn globs(
    state: &State,
    low: &LowArgs,
) -> anyhow::Result<ignore::overrides::Override> {
    let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);

    // Global case insensitivity
    if low.glob_case_insensitive {
        builder.case_insensitive(true).unwrap();
    }

    // Regular globs
    for glob in low.globs.iter() {
        builder.add(glob)?;
    }

    // Case-insensitive globs (--iglob)
    builder.case_insensitive(true).unwrap();
    for glob in low.iglobs.iter() {
        builder.add(&glob)?;
    }

    Ok(builder.build()?)
}

Hostname resolution:

fn hostname(bin: Option<&Path>) -> Option<String> {
    // Try user-specified binary first
    let Some(bin) = bin else { return platform_hostname() };

    let bin = match grep::cli::resolve_binary(bin) {
        Ok(bin) => bin,
        Err(err) => {
            log::debug!("hostname binary failed: {err}");
            return platform_hostname();
        }
    };

    let mut cmd = std::process::Command::new(&bin);
    cmd.stdin(std::process::Stdio::null());

    let rdr = match grep::cli::CommandReader::new(&mut cmd) {
        Ok(rdr) => rdr,
        Err(_) => return platform_hostname(),
    };

    let out = std::io::read_to_string(rdr).ok()?;
    let hostname = out.trim();

    if hostname.is_empty() {
        platform_hostname()
    } else {
        Some(hostname.to_string())
    }
}

Quick Reference: HiArgs Methods

Method Returns Purpose
matcher() PatternMatcher Compiled regex engine
searcher() Searcher File reading configuration
printer() Printer<W> Output formatter
search_worker() SearchWorker<W> Coordinates all three
walk_builder() WalkBuilder Directory traversal
haystack_builder() HaystackBuilder Path → Haystack converter
buffer_writer() BufferWriter Thread-safe output
stdout() StandardStream Colored stdout writer
sort() Iterator Optional result sorting
threads() usize Parallelism level
mode() Mode What command to run
stats() Option<Stats> Statistics tracking
types() &Types File type matcher