ripgrep crates/core/flags/hiargs.rs: Code Companion¶
Reference code for the High-Level Arguments lecture. Sections correspond to the lecture document.
Section 1: The HiArgs Structure and Its Philosophy¶
/// A high level representation of CLI arguments.
///
/// The distinction between low and high level arguments is somewhat arbitrary
/// and wishy washy. The main idea here is that high level arguments generally
/// require all of CLI parsing to be finished.
#[derive(Debug)]
pub(crate) struct HiArgs {
// Direct copies from LowArgs - simple values that pass through unchanged
binary: BinaryDetection,
boundary: Option<BoundaryMode>,
buffer: BufferMode,
byte_offset: bool,
case: CaseMode,
// Computed from multiple low-level arguments
color: ColorChoice, // Depends on terminal detection + user flag
colors: grep::printer::ColorSpecs,
column: bool, // Derived from --column OR --vimgrep
heading: bool, // Complex logic involving terminal + vimgrep
// Fully-constructed objects from external crates
globs: ignore::overrides::Override, // Compiled glob matcher
types: ignore::types::Types, // File type matcher
mmap_choice: grep::searcher::MmapChoice,
hyperlink_config: grep::printer::HyperlinkConfig,
// Derived state affecting search behavior
quit_after_match: bool, // Depends on both stats and quiet
threads: usize, // Computed from sort mode, path count, available CPUs
with_filename: bool, // Complex default based on vimgrep and path count
// ... over 80 fields total
cwd: PathBuf,
is_terminal_stdout: bool,
line_number: bool,
paths: Paths,
patterns: Patterns,
// ... many more
}
The field groupings show the progression from raw values to computed configuration. Fields like globs and types are fully constructed matchers ready for use during search.
Section 2: The Conversion Process and State Management¶
impl HiArgs {
pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
// Defensive assertion - special modes should never reach this point
assert_eq!(None, low.special, "special mode demands short-circuiting");
// Validate sorting mode support early
if let Some(ref sort) = low.sort {
sort.supported()?;
}
// Mode adjustments for flag interactions
match low.mode {
Mode::Search(ref mut mode) => match *mode {
// -v --count-matches becomes -v --count
// (counting individual matches makes no sense when inverting)
SearchMode::CountMatches if low.invert_match => {
*mode = SearchMode::Count;
}
// -o --count becomes --count-matches
SearchMode::Count if low.only_matching => {
*mode = SearchMode::CountMatches;
}
_ => {}
},
_ => {}
}
// Initialize shared state for conversion functions
let mut state = State::new()?;
// Order matters: patterns must be read before paths
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
// Build complex objects using resolved state
let binary = BinaryDetection::from_low_args(&state, &low);
let colors = take_color_specs(&mut state, &mut low);
// ... more conversions
}
}
/// State shared across all low->high argument conversions.
#[derive(Debug)]
struct State {
/// Terminal detection affects color and heading defaults
is_terminal_stdout: bool,
/// Prevents double-reading stdin (patterns vs search input)
stdin_consumed: bool,
/// Cached for glob matching and path resolution
cwd: PathBuf,
}
impl State {
fn new() -> anyhow::Result<State> {
use std::io::IsTerminal;
let cwd = current_dir()?;
log::debug!("read CWD from environment: {}", cwd.display());
Ok(State {
is_terminal_stdout: std::io::stdout().is_terminal(),
stdin_consumed: false,
cwd,
})
}
}
The State struct acts as a coordination mechanism—mutations by one conversion function (like setting stdin_consumed = true) affect subsequent conversions.
Section 3: Pattern Collection and Deduplication¶
/// The disjunction of patterns to search for.
#[derive(Debug)]
struct Patterns {
patterns: Vec<String>,
}
impl Patterns {
fn from_low_args(
state: &mut State,
low: &mut LowArgs,
) -> anyhow::Result<Patterns> {
// Non-search modes don't need patterns
if !matches!(low.mode, Mode::Search(_)) {
return Ok(Patterns { patterns: vec![] });
}
// Common case: first positional is the pattern (rg foo)
if low.patterns.is_empty() {
anyhow::ensure!(
!low.positional.is_empty(),
"ripgrep requires at least one pattern to execute a search"
);
let ospat = low.positional.remove(0);
let Ok(pat) = ospat.into_string() else {
anyhow::bail!("pattern given is not valid UTF-8")
};
return Ok(Patterns { patterns: vec![pat] });
}
// Deduplication with HashSet - prevents regex engine slowdowns
let mut seen = HashSet::new();
let mut patterns = Vec::with_capacity(low.patterns.len());
let mut add = |pat: String| {
if !seen.contains(&pat) {
seen.insert(pat.clone());
patterns.push(pat);
}
};
// Collect from all sources: -e/--regexp and -f/--file
for source in low.patterns.drain(..) {
match source {
PatternSource::Regexp(pat) => add(pat),
PatternSource::File(path) => {
if path == Path::new("-") {
// Stdin guard: ensure we haven't already consumed it
anyhow::ensure!(
!state.stdin_consumed,
"error reading -f/--file from stdin: stdin \
has already been consumed"
);
for pat in grep::cli::patterns_from_stdin()? {
add(pat);
}
// Mark stdin as consumed for later path handling
state.stdin_consumed = true;
} else {
for pat in grep::cli::patterns_from_path(&path)? {
add(pat);
}
}
}
}
}
Ok(Patterns { patterns })
}
}
The seen HashSet adds memory overhead but prevents pathological performance when users accidentally pass thousands of duplicate patterns.
Section 4: Path Resolution and Implicit Behavior¶
/// The collection of paths to search.
#[derive(Debug)]
struct Paths {
paths: Vec<PathBuf>,
/// True when ripgrep "guessed" to search CWD (affects path printing)
has_implicit_path: bool,
/// Enables single-file optimizations
is_one_file: bool,
}
impl Paths {
fn from_low_args(
state: &mut State,
_: &Patterns, // Required to ensure patterns consumed first
low: &mut LowArgs,
) -> anyhow::Result<Paths> {
let mut paths = Vec::with_capacity(low.positional.len());
for osarg in low.positional.drain(..) {
let path = PathBuf::from(osarg);
if state.stdin_consumed && path == Path::new("-") {
anyhow::bail!(
"error: attempted to read patterns from stdin \
while also searching stdin",
);
}
paths.push(path);
}
if !paths.is_empty() {
// Note: !is_dir() rather than is_file() - handles device files, pipes
// See: https://github.com/BurntSushi/ripgrep/issues/2736
let is_one_file = paths.len() == 1
&& (paths[0] == Path::new("-") || !paths[0].is_dir());
return Ok(Paths { paths, has_implicit_path: false, is_one_file });
}
// Heuristic: determine if we should search stdin or CWD
let is_readable_stdin = grep::cli::is_readable_stdin();
let use_cwd = !is_readable_stdin
|| state.stdin_consumed
|| !matches!(low.mode, Mode::Search(_));
let (path, is_one_file) = if use_cwd {
log::debug!("heuristic chose to search ./");
(PathBuf::from("./"), false)
} else {
log::debug!("heuristic chose to search stdin");
(PathBuf::from("-"), true)
};
Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file })
}
}
The has_implicit_path field determines whether paths are printed with ./ prefix—matching GNU grep's behavior for script compatibility.
Section 5: Binary Detection Strategy¶
/// Dual binary detection modes: one for explicit files, one for discovered files.
#[derive(Debug)]
struct BinaryDetection {
/// Used when user explicitly names a file
explicit: grep::searcher::BinaryDetection,
/// Used when file is discovered during directory traversal
implicit: grep::searcher::BinaryDetection,
}
impl BinaryDetection {
fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
// -a/--text or --null-data disables all binary detection
let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
// --binary mode: search but suppress binary content in output
let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);
// Explicit files: never quit early (would be filtering user's request)
let explicit = if none {
grep::searcher::BinaryDetection::none()
} else {
// Convert NUL bytes but don't quit - user asked for this file
grep::searcher::BinaryDetection::convert(b'\x00')
};
// Implicit files: can quit early (user didn't specifically request them)
let implicit = if none {
grep::searcher::BinaryDetection::none()
} else if convert {
grep::searcher::BinaryDetection::convert(b'\x00')
} else {
// Default: quit on binary to skip non-text files quickly
grep::searcher::BinaryDetection::quit(b'\x00')
};
BinaryDetection { explicit, implicit }
}
/// Returns true when binary detection is completely disabled.
pub(crate) fn is_none(&self) -> bool {
let none = grep::searcher::BinaryDetection::none();
self.explicit == none && self.implicit == none
}
}
The philosophy: explicit requests should never be filtered, but implicit discovery can use aggressive heuristics for performance.
Section 6: The Builder Pattern Ecosystem in Action¶
impl HiArgs {
/// Builds a "standard" grep printer with all configured options.
fn printer_standard<W: termcolor::WriteColor>(
&self,
wtr: W,
) -> grep::printer::Standard<W> {
let mut builder = grep::printer::StandardBuilder::new();
builder
// Output formatting
.byte_offset(self.byte_offset)
.column(self.column)
.heading(self.heading)
.only_matching(self.only_matching)
.trim_ascii(self.trim)
// Colors and hyperlinks
.color_specs(self.colors.clone())
.hyperlink(self.hyperlink_config.clone())
// Column/line limits
.max_columns_preview(self.max_columns_preview)
.max_columns(self.max_columns)
// Path handling
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.separator_path(self.path_separator.clone())
// Vimgrep compatibility
.per_match_one_line(true)
.per_match(self.vimgrep)
// Replacement and context
.replacement(self.replace.clone().map(|r| r.into()))
.separator_context(self.context_separator.clone().into_bytes())
.separator_field_context(
self.field_context_separator.clone().into_bytes(),
)
.separator_field_match(
self.field_match_separator.clone().into_bytes(),
)
.stats(self.stats.is_some());
// Single-threaded: printer handles file separators
// Multi-threaded: buffer writer handles them (has global visibility)
if self.threads == 1 {
builder.separator_search(self.file_separator.clone());
}
builder.build(wtr)
}
/// Creates a directory walker configured from all relevant flags.
pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);
for path in self.paths.paths.iter().skip(1) {
builder.add(path);
}
builder
.max_depth(self.max_depth)
.follow_links(self.follow)
.max_filesize(self.max_filesize)
.threads(self.threads)
.same_file_system(self.one_file_system)
.overrides(self.globs.clone())
.types(self.types.clone())
.hidden(!self.hidden)
.parents(!self.no_ignore_parent)
.ignore(!self.no_ignore_dot)
.git_global(!self.no_ignore_vcs && !self.no_ignore_global)
.git_ignore(!self.no_ignore_vcs)
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
.require_git(!self.no_require_git);
Ok(builder)
}
}
Each builder method maps HiArgs fields to library configuration. The builder pattern allows extensive configuration without unwieldy constructor signatures.
Quick Reference¶
Key Type Relationships¶
Low-Level (LowArgs) |
High-Level (HiArgs) |
Notes |
|---|---|---|
Vec<PatternSource> |
Patterns |
Deduplicated, unified from all sources |
Vec<OsString> (positional) |
Paths |
With implicit path detection |
BinaryMode |
BinaryDetection |
Split into explicit/implicit modes |
Vec<ColorSpec> |
ColorSpecs |
Compiled into single config |
Vec<TypeChange> |
ignore::types::Types |
Fully built type matcher |
Vec<String> (globs) |
ignore::overrides::Override |
Compiled glob matcher |
State Flow During Conversion¶
LowArgs + State::new()
│
├─► Patterns::from_low_args() ──► may set stdin_consumed = true
│
├─► Paths::from_low_args() ──► checks stdin_consumed
│
├─► BinaryDetection::from_low_args()
│
├─► types(), globs(), stats() ──► build external crate objects
│
└─► HiArgs { ... } ──► fully resolved configuration
Computed Field Derivations¶
// Threading decision
threads = if sorting || one_file { 1 }
else { user_specified || min(available_cpus, 12) }
// Filename display
with_filename = user_specified || vimgrep || !is_one_file
// Line numbers
line_number = user_specified || (terminal && !stdin_only) || column || vimgrep
// Early exit optimization
quit_after_match = stats.is_none() && quiet