Ripgrep hiargs.rs: Code Companion¶
Reference code for the hiargs.rs lecture. Sections correspond to the lecture document.
Section 1: The HiArgs Struct¶
#[derive(Debug)]
pub(crate) struct HiArgs {
// Display options
color: ColorChoice,
colors: grep::printer::ColorSpecs,
column: bool,
heading: bool,
line_number: bool,
byte_offset: bool,
// Search behavior
case: CaseMode,
boundary: Option<BoundaryMode>,
fixed_strings: bool,
invert_match: bool,
multiline: bool,
multiline_dotall: bool,
// What to search
patterns: Patterns,
paths: Paths,
mode: Mode,
// Filtering
types: ignore::types::Types,
globs: ignore::overrides::Override,
binary: BinaryDetection,
max_filesize: Option<u64>,
max_depth: Option<usize>,
// Performance
threads: usize,
mmap_choice: grep::searcher::MmapChoice,
dfa_size_limit: Option<usize>,
regex_size_limit: Option<usize>,
// ... approximately 70 fields total
}
Field categories:
| Category | Examples |
|---|---|
| Display | color, heading, line_number, column |
| Search | case, boundary, multiline, invert_match |
| Filtering | types, globs, binary, hidden |
| Performance | threads, mmap_choice, *_size_limit |
| Context | context, context_separator, file_separator |
| Output format | mode, quiet, stats, vimgrep |
Section 2: The Transformation — from_low_args¶
impl HiArgs {
pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
// Assertion: special modes already handled
assert_eq!(None, low.special, "special mode demands short-circuiting");
// Validate sorting support
if let Some(ref sort) = low.sort {
sort.supported()?;
}
// Mode adjustments for flag interactions
match low.mode {
Mode::Search(ref mut mode) => match *mode {
// -v --count-matches → -v --count
SearchMode::CountMatches if low.invert_match => {
*mode = SearchMode::Count;
}
// -o --count → --count-matches
SearchMode::Count if low.only_matching => {
*mode = SearchMode::CountMatches;
}
_ => {}
},
_ => {}
}
// Initialize shared state
let mut state = State::new()?;
// Sequential conversions (order matters!)
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
let binary = BinaryDetection::from_low_args(&state, &low);
let colors = take_color_specs(&mut state, &mut low);
let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?;
let stats = stats(&low);
let types = types(&low)?;
let globs = globs(&state, &low)?;
let pre_globs = preprocessor_globs(&state, &low)?;
// Derived values...
Ok(HiArgs { /* ... */ })
}
}
Flag interaction examples:
// Invert match + count-matches doesn't make sense semantically
// Counting inverted matches → just count lines without matches
SearchMode::CountMatches if low.invert_match => SearchMode::Count
// Only-matching + count is effectively count-matches
// When only printing matched parts, counting them makes sense
SearchMode::Count if low.only_matching => SearchMode::CountMatches
Section 3: State Management¶
#[derive(Debug)]
struct State {
/// Is stdout connected to a terminal?
is_terminal_stdout: bool,
/// Has stdin already been read (for patterns)?
stdin_consumed: bool,
/// Captured at startup, doesn't change
cwd: PathBuf,
}
impl State {
fn new() -> anyhow::Result<State> {
use std::io::IsTerminal;
let cwd = current_dir()?;
log::debug!("read CWD from environment: {}", cwd.display());
Ok(State {
is_terminal_stdout: std::io::stdout().is_terminal(),
stdin_consumed: false,
cwd,
})
}
}
How state affects behavior:
// Terminal detection → default colors
let color = match low.color {
ColorChoice::Auto if !state.is_terminal_stdout => ColorChoice::Never,
_ => low.color,
};
// Terminal detection → default line numbers
let line_number = low.line_number.unwrap_or_else(|| {
state.is_terminal_stdout && !paths.is_only_stdin()
});
// Stdin tracking → prevent double consumption
if path == Path::new("-") && state.stdin_consumed {
anyhow::bail!("stdin has already been consumed");
}
Section 4: Pattern Handling¶
#[derive(Debug)]
struct Patterns {
patterns: Vec<String>,
}
impl Patterns {
fn from_low_args(
state: &mut State,
low: &mut LowArgs,
) -> anyhow::Result<Patterns> {
// Non-search mode = no patterns needed
if !matches!(low.mode, Mode::Search(_)) {
return Ok(Patterns { patterns: vec![] });
}
// No -e/-f flags = first positional is the pattern
if low.patterns.is_empty() {
anyhow::ensure!(
!low.positional.is_empty(),
"ripgrep requires at least one pattern"
);
let ospat = low.positional.remove(0);
let Ok(pat) = ospat.into_string() else {
anyhow::bail!("pattern is not valid UTF-8")
};
return Ok(Patterns { patterns: vec![pat] });
}
// Collect from -e/--regexp and -f/--file with deduplication
let mut seen = HashSet::new();
let mut patterns = Vec::with_capacity(low.patterns.len());
let mut add = |pat: String| {
if !seen.contains(&pat) {
seen.insert(pat.clone());
patterns.push(pat);
}
};
for source in low.patterns.drain(..) {
match source {
PatternSource::Regexp(pat) => add(pat),
PatternSource::File(path) => {
if path == Path::new("-") {
anyhow::ensure!(
!state.stdin_consumed,
"stdin has already been consumed"
);
for pat in grep::cli::patterns_from_stdin()? {
add(pat);
}
state.stdin_consumed = true; // Mark consumed!
} else {
for pat in grep::cli::patterns_from_path(&path)? {
add(pat);
}
}
}
}
}
Ok(Patterns { patterns })
}
}
Pattern sources:
| Source | Example | Notes |
|---|---|---|
| Positional | rg foo |
First arg when no -e/-f |
-e flag |
rg -e foo -e bar |
Can repeat |
-f flag |
rg -f patterns.txt |
One pattern per line |
-f - |
rg -f - |
Read from stdin |
Section 5: Path Handling¶
#[derive(Debug)]
struct Paths {
paths: Vec<PathBuf>,
has_implicit_path: bool, // Did we guess the path?
is_one_file: bool, // Exactly one non-directory?
}
impl Paths {
fn from_low_args(
state: &mut State,
_: &Patterns, // Ensures patterns processed first
low: &mut LowArgs,
) -> anyhow::Result<Paths> {
// Explicit paths provided
let mut paths = Vec::with_capacity(low.positional.len());
for osarg in low.positional.drain(..) {
let path = PathBuf::from(osarg);
if state.stdin_consumed && path == Path::new("-") {
anyhow::bail!("attempted to search stdin after reading patterns from it");
}
paths.push(path);
}
if !paths.is_empty() {
let is_one_file = paths.len() == 1
&& (paths[0] == Path::new("-") || !paths[0].is_dir());
return Ok(Paths {
paths,
has_implicit_path: false,
is_one_file
});
}
// No paths: use heuristics
let is_readable_stdin = grep::cli::is_readable_stdin();
let use_cwd = !is_readable_stdin
|| state.stdin_consumed
|| !matches!(low.mode, Mode::Search(_));
let (path, is_one_file) = if use_cwd {
(PathBuf::from("./"), false)
} else {
(PathBuf::from("-"), true) // Search stdin
};
Ok(Paths {
paths: vec![path],
has_implicit_path: true,
is_one_file
})
}
}
Heuristic decision tree:
No explicit paths given?
├── stdin readable AND not consumed AND search mode?
│ └── Yes → Search stdin
│ └── No → Search current directory
└── Explicit paths → Use them directly
Section 6: Binary Detection¶
#[derive(Debug)]
struct BinaryDetection {
explicit: grep::searcher::BinaryDetection, // For named files
implicit: grep::searcher::BinaryDetection, // For discovered files
}
impl BinaryDetection {
fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);
// Explicit files: never quit early
let explicit = if none {
grep::searcher::BinaryDetection::none()
} else {
grep::searcher::BinaryDetection::convert(b'\x00')
};
// Implicit files: can quit early
let implicit = if none {
grep::searcher::BinaryDetection::none()
} else if convert {
grep::searcher::BinaryDetection::convert(b'\x00')
} else {
grep::searcher::BinaryDetection::quit(b'\x00') // Default!
};
BinaryDetection { explicit, implicit }
}
}
Binary detection modes:
| Mode | Explicit files | Implicit files |
|---|---|---|
| Default | Convert NUL | Quit on NUL |
-a/--text |
None | None |
--binary |
Convert NUL | Convert NUL |
--null-data |
None | None |
Section 7: Building the Matcher¶
pub(crate) fn matcher(&self) -> anyhow::Result<PatternMatcher> {
match self.engine {
EngineChoice::Default => match self.matcher_rust() {
Ok(m) => Ok(m),
Err(err) => {
anyhow::bail!(suggest_other_engine(err.to_string()));
}
},
EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?),
EngineChoice::Auto => {
// Try Rust first
let rust_err = match self.matcher_rust() {
Ok(m) => return Ok(m),
Err(err) => err,
};
// Fall back to PCRE2
let pcre_err = match self.matcher_pcre2() {
Ok(m) => return Ok(m),
Err(err) => err,
};
// Both failed — show both errors
anyhow::bail!(
"regex could not be compiled with either engine.\n\n\
Rust regex error:\n{rust_err}\n\n\
PCRE2 error:\n{pcre_err}"
);
}
}
}
fn matcher_rust(&self) -> anyhow::Result<PatternMatcher> {
let mut builder = grep::regex::RegexMatcherBuilder::new();
builder
.multi_line(true)
.unicode(!self.no_unicode)
.octal(false)
.fixed_strings(self.fixed_strings);
// Case sensitivity
match self.case {
CaseMode::Sensitive => builder.case_insensitive(false),
CaseMode::Insensitive => builder.case_insensitive(true),
CaseMode::Smart => builder.case_smart(true),
};
// Word/line boundaries
if let Some(ref boundary) = self.boundary {
match *boundary {
BoundaryMode::Line => builder.whole_line(true),
BoundaryMode::Word => builder.word(true),
};
}
// Line terminator handling
if self.multiline {
builder.dot_matches_new_line(self.multiline_dotall);
} else {
builder.line_terminator(Some(b'\n'));
if self.null_data {
builder.line_terminator(Some(b'\x00'));
}
}
// Binary detection interaction
if !self.binary.is_none() {
builder.ban_byte(Some(b'\x00'));
}
let m = builder.build_many(&self.patterns.patterns)?;
Ok(PatternMatcher::RustRegex(m))
}
Error suggestion helpers:
fn suggest_pcre2(msg: &str) -> Option<String> {
if msg.contains("backreferences") || msg.contains("look-around") {
Some(format!(
"{msg}\n\nConsider enabling PCRE2 with the --pcre2 flag."
))
} else {
None
}
}
fn suggest_multiline(msg: String) -> String {
if msg.contains("the literal") && msg.contains("not allowed") {
format!("{msg}\n\nConsider enabling multiline mode with -U.")
} else {
msg
}
}
Section 8: Building the Searcher¶
pub(crate) fn searcher(&self) -> anyhow::Result<grep::searcher::Searcher> {
// Line terminator configuration
let line_term = if self.crlf {
grep::matcher::LineTerminator::crlf()
} else if self.null_data {
grep::matcher::LineTerminator::byte(b'\x00')
} else {
grep::matcher::LineTerminator::byte(b'\n')
};
let mut builder = grep::searcher::SearcherBuilder::new();
builder
.line_terminator(line_term)
.line_number(self.line_number)
.invert_match(self.invert_match)
.multi_line(self.multiline)
.memory_map(self.mmap_choice.clone())
.max_matches(self.max_count)
.stop_on_nonmatch(self.stop_on_nonmatch);
// Context configuration
match self.context {
ContextMode::Passthru => builder.passthru(true),
ContextMode::Limited(ref limited) => {
let (before, after) = limited.get();
builder.before_context(before);
builder.after_context(after);
}
}
// Encoding configuration
match self.encoding {
EncodingMode::Auto => {} // Default BOM sniffing
EncodingMode::Some(ref enc) => {
builder.encoding(Some(enc.clone()));
}
EncodingMode::Disabled => {
builder.bom_sniffing(false);
}
}
Ok(builder.build())
}
Memory map decision:
let mmap_choice = {
let maybe = unsafe { grep::searcher::MmapChoice::auto() };
let never = grep::searcher::MmapChoice::never();
match low.mmap {
MmapMode::Auto => {
// Heuristic: mmap for ≤10 regular files
if paths.paths.len() <= 10
&& paths.paths.iter().all(|p| p.is_file())
{
maybe
} else {
never
}
}
MmapMode::AlwaysTryMmap => maybe,
MmapMode::Never => never,
}
};
Section 9: Building the Printer¶
pub(crate) fn printer<W: termcolor::WriteColor>(
&self,
search_mode: SearchMode,
wtr: W,
) -> Printer<W> {
// Determine summary kind for non-standard modes
let summary_kind = if self.quiet {
match search_mode {
SearchMode::FilesWithMatches | SearchMode::Count
| SearchMode::CountMatches | SearchMode::JSON
| SearchMode::Standard => SummaryKind::QuietWithMatch,
SearchMode::FilesWithoutMatch => SummaryKind::QuietWithoutMatch,
}
} else {
match search_mode {
SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch,
SearchMode::Count => SummaryKind::Count,
SearchMode::CountMatches => SummaryKind::CountMatches,
SearchMode::JSON => return Printer::JSON(self.printer_json(wtr)),
SearchMode::Standard => {
return Printer::Standard(self.printer_standard(wtr))
}
}
};
Printer::Summary(self.printer_summary(wtr, summary_kind))
}
fn printer_standard<W: termcolor::WriteColor>(
&self,
wtr: W,
) -> grep::printer::Standard<W> {
let mut builder = grep::printer::StandardBuilder::new();
builder
.byte_offset(self.byte_offset)
.color_specs(self.colors.clone())
.column(self.column)
.heading(self.heading)
.hyperlink(self.hyperlink_config.clone())
.max_columns(self.max_columns)
.max_columns_preview(self.max_columns_preview)
.only_matching(self.only_matching)
.path(self.with_filename)
.replacement(self.replace.clone().map(|r| r.into()))
.stats(self.stats.is_some())
.trim_ascii(self.trim);
// Single-threaded: printer owns file separator
if self.threads == 1 {
builder.separator_search(self.file_separator.clone());
}
builder.build(wtr)
}
Printer types:
| Mode | Printer | Output |
|---|---|---|
| Standard | Standard<W> |
Matching lines with context |
-l |
Summary<W> |
Filenames only |
-c |
Summary<W> |
Counts per file |
--json |
JSON<W> |
Structured JSON lines |
Section 10: Building the Walker¶
pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);
// Add additional paths
for path in self.paths.paths.iter().skip(1) {
builder.add(path);
}
// Add custom ignore files
if !self.no_ignore_files {
for path in self.ignore_file.iter() {
if let Some(err) = builder.add_ignore(path) {
ignore_message!("{err}");
}
}
}
builder
.max_depth(self.max_depth)
.follow_links(self.follow)
.max_filesize(self.max_filesize)
.threads(self.threads)
.same_file_system(self.one_file_system)
.overrides(self.globs.clone())
.types(self.types.clone())
// Ignore rule configuration
.hidden(!self.hidden)
.parents(!self.no_ignore_parent)
.ignore(!self.no_ignore_dot)
.git_global(!self.no_ignore_vcs && !self.no_ignore_global)
.git_ignore(!self.no_ignore_vcs)
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
.require_git(!self.no_require_git)
.ignore_case_insensitive(self.ignore_file_case_insensitive);
// Add .rgignore support
if !self.no_ignore_dot {
builder.add_custom_ignore_filename(".rgignore");
}
// Path sorting during traversal (ascending only)
if let Some(ref sort) = self.sort {
if !sort.reverse && matches!(sort.kind, SortModeKind::Path) {
builder.sort_by_file_name(|a, b| a.cmp(b));
}
}
Ok(builder)
}
Ignore file precedence (highest to lowest):
- Command-line
--ignore-file .rgignore.ignore.gitignore- Global git ignores
Section 11: Derived Configuration¶
// Thread count derivation
let threads = if low.sort.is_some() || paths.is_one_file {
1 // Sorting or single file → no parallelism benefit
} else if let Some(threads) = low.threads {
threads // User specified
} else {
std::thread::available_parallelism()
.map_or(1, |n| n.get())
.min(12) // Cap at 12
};
// Line number derivation
let line_number = low.line_number.unwrap_or_else(|| {
if low.quiet { return false; }
let Mode::Search(ref mode) = low.mode else { return false };
match *mode {
SearchMode::FilesWithMatches
| SearchMode::FilesWithoutMatch
| SearchMode::Count
| SearchMode::CountMatches => false,
SearchMode::JSON => true,
SearchMode::Standard => {
(state.is_terminal_stdout && !paths.is_only_stdin())
|| column
|| low.vimgrep
}
}
});
// Heading derivation
let heading = match low.heading {
None => !low.vimgrep && state.is_terminal_stdout,
Some(false) => false,
Some(true) => !low.vimgrep, // vimgrep overrides
};
// Filename display derivation
let with_filename = low.with_filename
.unwrap_or_else(|| low.vimgrep || !paths.is_one_file);
Section 12: Helper Functions¶
Type definitions:
fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
let mut builder = ignore::types::TypesBuilder::new();
builder.add_defaults(); // rust, python, c, etc.
for tychange in low.type_changes.iter() {
match *tychange {
TypeChange::Clear { ref name } => builder.clear(name),
TypeChange::Add { ref def } => builder.add_def(def)?,
TypeChange::Select { ref name } => builder.select(name),
TypeChange::Negate { ref name } => builder.negate(name),
}
}
Ok(builder.build()?)
}
Glob overrides:
fn globs(
state: &State,
low: &LowArgs,
) -> anyhow::Result<ignore::overrides::Override> {
let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);
// Global case insensitivity
if low.glob_case_insensitive {
builder.case_insensitive(true).unwrap();
}
// Regular globs
for glob in low.globs.iter() {
builder.add(glob)?;
}
// Case-insensitive globs (--iglob)
builder.case_insensitive(true).unwrap();
for glob in low.iglobs.iter() {
builder.add(&glob)?;
}
Ok(builder.build()?)
}
Hostname resolution:
fn hostname(bin: Option<&Path>) -> Option<String> {
// Try user-specified binary first
let Some(bin) = bin else { return platform_hostname() };
let bin = match grep::cli::resolve_binary(bin) {
Ok(bin) => bin,
Err(err) => {
log::debug!("hostname binary failed: {err}");
return platform_hostname();
}
};
let mut cmd = std::process::Command::new(&bin);
cmd.stdin(std::process::Stdio::null());
let rdr = match grep::cli::CommandReader::new(&mut cmd) {
Ok(rdr) => rdr,
Err(_) => return platform_hostname(),
};
let out = std::io::read_to_string(rdr).ok()?;
let hostname = out.trim();
if hostname.is_empty() {
platform_hostname()
} else {
Some(hostname.to_string())
}
}
Quick Reference: HiArgs Methods¶
| Method | Returns | Purpose |
|---|---|---|
matcher() |
PatternMatcher |
Compiled regex engine |
searcher() |
Searcher |
File reading configuration |
printer() |
Printer<W> |
Output formatter |
search_worker() |
SearchWorker<W> |
Coordinates all three |
walk_builder() |
WalkBuilder |
Directory traversal |
haystack_builder() |
HaystackBuilder |
Path → Haystack converter |
buffer_writer() |
BufferWriter |
Thread-safe output |
stdout() |
StandardStream |
Colored stdout writer |
sort() |
Iterator |
Optional result sorting |
threads() |
usize |
Parallelism level |
mode() |
Mode |
What command to run |
stats() |
Option<Stats> |
Statistics tracking |
types() |
&Types |
File type matcher |