Skip to content

ripgrep crates/core/flags/hiargs.rs: Code Companion

Reference code for the High-Level Arguments lecture. Sections correspond to the lecture document.


Section 1: The HiArgs Structure and Its Philosophy

/// A high level representation of CLI arguments.
///
/// The distinction between low and high level arguments is somewhat arbitrary
/// and wishy washy. The main idea here is that high level arguments generally
/// require all of CLI parsing to be finished.
#[derive(Debug)]
pub(crate) struct HiArgs {
    // Direct copies from LowArgs - simple values that pass through unchanged
    binary: BinaryDetection,
    boundary: Option<BoundaryMode>,
    buffer: BufferMode,
    byte_offset: bool,
    case: CaseMode,

    // Computed from multiple low-level arguments
    color: ColorChoice,              // Depends on terminal detection + user flag
    colors: grep::printer::ColorSpecs,
    column: bool,                    // Derived from --column OR --vimgrep
    heading: bool,                   // Complex logic involving terminal + vimgrep

    // Fully-constructed objects from external crates
    globs: ignore::overrides::Override,  // Compiled glob matcher
    types: ignore::types::Types,         // File type matcher
    mmap_choice: grep::searcher::MmapChoice,
    hyperlink_config: grep::printer::HyperlinkConfig,

    // Derived state affecting search behavior
    quit_after_match: bool,          // Depends on both stats and quiet
    threads: usize,                  // Computed from sort mode, path count, available CPUs
    with_filename: bool,             // Complex default based on vimgrep and path count

    // ... over 80 fields total
    cwd: PathBuf,
    is_terminal_stdout: bool,
    line_number: bool,
    paths: Paths,
    patterns: Patterns,
    // ... many more
}

The field groupings show the progression from raw values to computed configuration. Fields like globs and types are fully constructed matchers ready for use during search.


Section 2: The Conversion Process and State Management

impl HiArgs {
    pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
        // Defensive assertion - special modes should never reach this point
        assert_eq!(None, low.special, "special mode demands short-circuiting");

        // Validate sorting mode support early
        if let Some(ref sort) = low.sort {
            sort.supported()?;
        }

        // Mode adjustments for flag interactions
        match low.mode {
            Mode::Search(ref mut mode) => match *mode {
                // -v --count-matches becomes -v --count
                // (counting individual matches makes no sense when inverting)
                SearchMode::CountMatches if low.invert_match => {
                    *mode = SearchMode::Count;
                }
                // -o --count becomes --count-matches
                SearchMode::Count if low.only_matching => {
                    *mode = SearchMode::CountMatches;
                }
                _ => {}
            },
            _ => {}
        }

        // Initialize shared state for conversion functions
        let mut state = State::new()?;

        // Order matters: patterns must be read before paths
        let patterns = Patterns::from_low_args(&mut state, &mut low)?;
        let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;

        // Build complex objects using resolved state
        let binary = BinaryDetection::from_low_args(&state, &low);
        let colors = take_color_specs(&mut state, &mut low);
        // ... more conversions
    }
}

/// State shared across all low->high argument conversions.
#[derive(Debug)]
struct State {
    /// Terminal detection affects color and heading defaults
    is_terminal_stdout: bool,
    /// Prevents double-reading stdin (patterns vs search input)
    stdin_consumed: bool,
    /// Cached for glob matching and path resolution
    cwd: PathBuf,
}

impl State {
    fn new() -> anyhow::Result<State> {
        use std::io::IsTerminal;

        let cwd = current_dir()?;
        log::debug!("read CWD from environment: {}", cwd.display());
        Ok(State {
            is_terminal_stdout: std::io::stdout().is_terminal(),
            stdin_consumed: false,
            cwd,
        })
    }
}

The State struct acts as a coordination mechanism—mutations by one conversion function (like setting stdin_consumed = true) affect subsequent conversions.


Section 3: Pattern Collection and Deduplication

/// The disjunction of patterns to search for.
#[derive(Debug)]
struct Patterns {
    patterns: Vec<String>,
}

impl Patterns {
    fn from_low_args(
        state: &mut State,
        low: &mut LowArgs,
    ) -> anyhow::Result<Patterns> {
        // Non-search modes don't need patterns
        if !matches!(low.mode, Mode::Search(_)) {
            return Ok(Patterns { patterns: vec![] });
        }

        // Common case: first positional is the pattern (rg foo)
        if low.patterns.is_empty() {
            anyhow::ensure!(
                !low.positional.is_empty(),
                "ripgrep requires at least one pattern to execute a search"
            );
            let ospat = low.positional.remove(0);
            let Ok(pat) = ospat.into_string() else {
                anyhow::bail!("pattern given is not valid UTF-8")
            };
            return Ok(Patterns { patterns: vec![pat] });
        }

        // Deduplication with HashSet - prevents regex engine slowdowns
        let mut seen = HashSet::new();
        let mut patterns = Vec::with_capacity(low.patterns.len());
        let mut add = |pat: String| {
            if !seen.contains(&pat) {
                seen.insert(pat.clone());
                patterns.push(pat);
            }
        };

        // Collect from all sources: -e/--regexp and -f/--file
        for source in low.patterns.drain(..) {
            match source {
                PatternSource::Regexp(pat) => add(pat),
                PatternSource::File(path) => {
                    if path == Path::new("-") {
                        // Stdin guard: ensure we haven't already consumed it
                        anyhow::ensure!(
                            !state.stdin_consumed,
                            "error reading -f/--file from stdin: stdin \
                             has already been consumed"
                        );
                        for pat in grep::cli::patterns_from_stdin()? {
                            add(pat);
                        }
                        // Mark stdin as consumed for later path handling
                        state.stdin_consumed = true;
                    } else {
                        for pat in grep::cli::patterns_from_path(&path)? {
                            add(pat);
                        }
                    }
                }
            }
        }
        Ok(Patterns { patterns })
    }
}

The seen HashSet adds memory overhead but prevents pathological performance when users accidentally pass thousands of duplicate patterns.


Section 4: Path Resolution and Implicit Behavior

/// The collection of paths to search.
#[derive(Debug)]
struct Paths {
    paths: Vec<PathBuf>,
    /// True when ripgrep "guessed" to search CWD (affects path printing)
    has_implicit_path: bool,
    /// Enables single-file optimizations
    is_one_file: bool,
}

impl Paths {
    fn from_low_args(
        state: &mut State,
        _: &Patterns,  // Required to ensure patterns consumed first
        low: &mut LowArgs,
    ) -> anyhow::Result<Paths> {
        let mut paths = Vec::with_capacity(low.positional.len());
        for osarg in low.positional.drain(..) {
            let path = PathBuf::from(osarg);
            if state.stdin_consumed && path == Path::new("-") {
                anyhow::bail!(
                    "error: attempted to read patterns from stdin \
                     while also searching stdin",
                );
            }
            paths.push(path);
        }

        if !paths.is_empty() {
            // Note: !is_dir() rather than is_file() - handles device files, pipes
            // See: https://github.com/BurntSushi/ripgrep/issues/2736
            let is_one_file = paths.len() == 1
                && (paths[0] == Path::new("-") || !paths[0].is_dir());
            return Ok(Paths { paths, has_implicit_path: false, is_one_file });
        }

        // Heuristic: determine if we should search stdin or CWD
        let is_readable_stdin = grep::cli::is_readable_stdin();
        let use_cwd = !is_readable_stdin
            || state.stdin_consumed
            || !matches!(low.mode, Mode::Search(_));

        let (path, is_one_file) = if use_cwd {
            log::debug!("heuristic chose to search ./");
            (PathBuf::from("./"), false)
        } else {
            log::debug!("heuristic chose to search stdin");
            (PathBuf::from("-"), true)
        };

        Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file })
    }
}

The has_implicit_path field determines whether paths are printed with ./ prefix—matching GNU grep's behavior for script compatibility.


Section 5: Binary Detection Strategy

/// Dual binary detection modes: one for explicit files, one for discovered files.
#[derive(Debug)]
struct BinaryDetection {
    /// Used when user explicitly names a file
    explicit: grep::searcher::BinaryDetection,
    /// Used when file is discovered during directory traversal
    implicit: grep::searcher::BinaryDetection,
}

impl BinaryDetection {
    fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
        // -a/--text or --null-data disables all binary detection
        let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
        // --binary mode: search but suppress binary content in output
        let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);

        // Explicit files: never quit early (would be filtering user's request)
        let explicit = if none {
            grep::searcher::BinaryDetection::none()
        } else {
            // Convert NUL bytes but don't quit - user asked for this file
            grep::searcher::BinaryDetection::convert(b'\x00')
        };

        // Implicit files: can quit early (user didn't specifically request them)
        let implicit = if none {
            grep::searcher::BinaryDetection::none()
        } else if convert {
            grep::searcher::BinaryDetection::convert(b'\x00')
        } else {
            // Default: quit on binary to skip non-text files quickly
            grep::searcher::BinaryDetection::quit(b'\x00')
        };

        BinaryDetection { explicit, implicit }
    }

    /// Returns true when binary detection is completely disabled.
    pub(crate) fn is_none(&self) -> bool {
        let none = grep::searcher::BinaryDetection::none();
        self.explicit == none && self.implicit == none
    }
}

The philosophy: explicit requests should never be filtered, but implicit discovery can use aggressive heuristics for performance.


Section 6: The Builder Pattern Ecosystem in Action

impl HiArgs {
    /// Builds a "standard" grep printer with all configured options.
    fn printer_standard<W: termcolor::WriteColor>(
        &self,
        wtr: W,
    ) -> grep::printer::Standard<W> {
        let mut builder = grep::printer::StandardBuilder::new();
        builder
            // Output formatting
            .byte_offset(self.byte_offset)
            .column(self.column)
            .heading(self.heading)
            .only_matching(self.only_matching)
            .trim_ascii(self.trim)
            // Colors and hyperlinks
            .color_specs(self.colors.clone())
            .hyperlink(self.hyperlink_config.clone())
            // Column/line limits
            .max_columns_preview(self.max_columns_preview)
            .max_columns(self.max_columns)
            // Path handling
            .path(self.with_filename)
            .path_terminator(self.path_terminator.clone())
            .separator_path(self.path_separator.clone())
            // Vimgrep compatibility
            .per_match_one_line(true)
            .per_match(self.vimgrep)
            // Replacement and context
            .replacement(self.replace.clone().map(|r| r.into()))
            .separator_context(self.context_separator.clone().into_bytes())
            .separator_field_context(
                self.field_context_separator.clone().into_bytes(),
            )
            .separator_field_match(
                self.field_match_separator.clone().into_bytes(),
            )
            .stats(self.stats.is_some());

        // Single-threaded: printer handles file separators
        // Multi-threaded: buffer writer handles them (has global visibility)
        if self.threads == 1 {
            builder.separator_search(self.file_separator.clone());
        }
        builder.build(wtr)
    }

    /// Creates a directory walker configured from all relevant flags.
    pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
        let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);
        for path in self.paths.paths.iter().skip(1) {
            builder.add(path);
        }

        builder
            .max_depth(self.max_depth)
            .follow_links(self.follow)
            .max_filesize(self.max_filesize)
            .threads(self.threads)
            .same_file_system(self.one_file_system)
            .overrides(self.globs.clone())
            .types(self.types.clone())
            .hidden(!self.hidden)
            .parents(!self.no_ignore_parent)
            .ignore(!self.no_ignore_dot)
            .git_global(!self.no_ignore_vcs && !self.no_ignore_global)
            .git_ignore(!self.no_ignore_vcs)
            .git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
            .require_git(!self.no_require_git);

        Ok(builder)
    }
}

Each builder method maps HiArgs fields to library configuration. The builder pattern allows extensive configuration without unwieldy constructor signatures.


Quick Reference

Key Type Relationships

Low-Level (LowArgs) High-Level (HiArgs) Notes
Vec<PatternSource> Patterns Deduplicated, unified from all sources
Vec<OsString> (positional) Paths With implicit path detection
BinaryMode BinaryDetection Split into explicit/implicit modes
Vec<ColorSpec> ColorSpecs Compiled into single config
Vec<TypeChange> ignore::types::Types Fully built type matcher
Vec<String> (globs) ignore::overrides::Override Compiled glob matcher

State Flow During Conversion

LowArgs + State::new()
    ├─► Patterns::from_low_args()  ──► may set stdin_consumed = true
    ├─► Paths::from_low_args()     ──► checks stdin_consumed
    ├─► BinaryDetection::from_low_args()
    ├─► types(), globs(), stats()  ──► build external crate objects
    └─► HiArgs { ... }             ──► fully resolved configuration

Computed Field Derivations

// Threading decision
threads = if sorting || one_file { 1 } 
          else { user_specified || min(available_cpus, 12) }

// Filename display
with_filename = user_specified || vimgrep || !is_one_file

// Line numbers
line_number = user_specified || (terminal && !stdin_only) || column || vimgrep

// Early exit optimization
quit_after_match = stats.is_none() && quiet