ripgrep crates/searcher/src/line_buffer.rs: Code Companion¶
Reference code for the Efficient Line Reading lecture. Sections correspond to the lecture document.
Section 1: The Builder Pattern in Action¶
/// The configuration of a buffer. This contains options that are fixed once
/// a buffer has been constructed.
#[derive(Clone, Copy, Debug)] // All fields are primitives/small enums - cheap to copy
struct Config {
capacity: usize,
lineterm: u8,
buffer_alloc: BufferAllocation,
binary: BinaryDetection,
}
impl Default for Config {
fn default() -> Config {
Config {
capacity: DEFAULT_BUFFER_CAPACITY, // 64 KB
lineterm: b'\n',
buffer_alloc: BufferAllocation::default(),
binary: BinaryDetection::default(),
}
}
}
/// A builder for constructing line buffers.
#[derive(Clone, Debug, Default)]
pub(crate) struct LineBufferBuilder {
config: Config,
}
impl LineBufferBuilder {
pub(crate) fn new() -> LineBufferBuilder {
LineBufferBuilder { config: Config::default() }
}
/// Create a new line buffer from this builder's configuration.
pub(crate) fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config, // Copies config, doesn't move - builder reusable
buf: vec![0; self.config.capacity],
pos: 0,
last_lineterm: 0,
end: 0,
absolute_byte_offset: 0,
binary_byte_offset: None,
}
}
/// Each setter returns &mut Self for method chaining
pub(crate) fn capacity(
&mut self,
capacity: usize,
) -> &mut LineBufferBuilder {
self.config.capacity = capacity;
self
}
pub(crate) fn line_terminator(
&mut self,
lineterm: u8,
) -> &mut LineBufferBuilder {
self.config.lineterm = lineterm;
self
}
}
The build() method takes &self rather than self, allowing multiple buffers to be created from the same builder configuration.
Section 2: Memory Allocation Strategies¶
/// The default buffer capacity that we use for the line buffer.
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
/// Controls additional memory allocation beyond initial capacity
#[derive(Clone, Copy, Debug)]
pub(crate) enum BufferAllocation {
/// Grow without limit to fit lines (default)
Eager,
/// Limit *additional* memory to this many bytes; error if exceeded
Error(usize),
}
impl Default for BufferAllocation {
fn default() -> BufferAllocation {
BufferAllocation::Eager
}
}
/// Create a new error to be used when a configured allocation limit has been
/// reached.
pub(crate) fn alloc_error(limit: usize) -> io::Error {
let msg = format!("configured allocation limit ({}) exceeded", limit);
io::Error::new(io::ErrorKind::Other, msg)
}
// In ensure_capacity():
fn ensure_capacity(&mut self) -> Result<(), io::Error> {
if !self.free_buffer().is_empty() {
return Ok(());
}
let len = std::cmp::max(1, self.buf.len());
let additional = match self.config.buffer_alloc {
BufferAllocation::Eager => len * 2, // Double the buffer
BufferAllocation::Error(limit) => {
let used = self.buf.len() - self.config.capacity; // Already allocated beyond initial
let n = std::cmp::min(len * 2, limit - used);
if n == 0 {
return Err(alloc_error(self.config.capacity + limit));
}
n
}
};
self.buf.resize(newlen, 0);
Ok(())
}
The Error(0) case guarantees zero additional allocation—the buffer will never grow beyond its initial capacity, providing predictable memory usage.
Section 3: Binary Detection Heuristics¶
/// The behavior of binary detection in the line buffer.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) enum BinaryDetection {
/// No detection - treat everything as text (default)
None,
/// Stop reading when this byte is found (typically b'\x00')
Quit(u8),
/// Replace this byte with line terminator, continue reading
Convert(u8),
}
impl BinaryDetection {
/// Returns true if detection demands early EOF on binary data
fn is_quit(&self) -> bool {
match *self {
BinaryDetection::Quit(_) => true,
_ => false,
}
}
}
// Inside the fill() method - binary detection logic:
match self.config.binary {
BinaryDetection::None => {} // nothing to do
BinaryDetection::Quit(byte) => {
if let Some(i) = newbytes.find_byte(byte) {
self.end = oldend + i; // Truncate at binary byte
self.last_lineterm = self.end;
self.binary_byte_offset =
Some(self.absolute_byte_offset + self.end as u64);
// Report whether any data remains
return Ok(self.pos < self.end);
}
}
BinaryDetection::Convert(byte) => {
if let Some(i) =
replace_bytes(newbytes, byte, self.config.lineterm)
{
// Record only the first binary offset
if self.binary_byte_offset.is_none() {
self.binary_byte_offset = Some(
self.absolute_byte_offset + (oldend + i) as u64,
);
}
}
}
}
The binary_byte_offset is an absolute position in the input stream, suitable for user-facing error messages that report where binary data was found.
Section 4: The Buffer State Machine¶
/// A line buffer manages a (typically fixed) buffer for holding lines.
#[derive(Clone, Debug)]
pub(crate) struct LineBuffer {
config: Config,
/// The primary buffer with which to hold data.
buf: Vec<u8>,
/// Current position - start of unconsumed data (valid index into buf)
pos: usize,
/// End of complete lines - where buffer() slice ends
last_lineterm: usize,
/// End of all valid data (>= last_lineterm, bytes after are partial line)
end: usize,
/// Logical offset from start of input stream (not a memory address)
absolute_byte_offset: u64,
/// Absolute offset where binary data was first detected
binary_byte_offset: Option<u64>,
}
impl LineBuffer {
/// Return the contents of this buffer - only complete lines
fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.last_lineterm]
}
/// Return free space for reading new data
fn free_buffer(&mut self) -> &mut [u8] {
&mut self.buf[self.end..]
}
/// Consume bytes - advances position and absolute offset
fn consume(&mut self, amt: usize) {
assert!(amt <= self.buffer().len());
self.pos += amt;
self.absolute_byte_offset += amt as u64;
}
/// Reset for reuse with a new reader
fn clear(&mut self) {
self.pos = 0;
self.last_lineterm = 0;
self.end = 0;
self.absolute_byte_offset = 0;
self.binary_byte_offset = None;
}
}
The three-position design (pos, last_lineterm, end) separates consumed data, complete lines ready for processing, and incomplete trailing data that needs more input.
Section 5: The Fill and Roll Cycle¶
fn fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error> {
// Early exit if binary detection already triggered EOF
if self.config.binary.is_quit() && self.binary_byte_offset.is_some() {
return Ok(!self.buffer().is_empty());
}
self.roll(); // Move unconsumed data to front
assert_eq!(self.pos, 0); // Sanity check: roll always resets pos
loop {
self.ensure_capacity()?;
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
if readlen == 0 {
// EOF: expose any remaining data as the final "line"
self.last_lineterm = self.end;
return Ok(!self.buffer().is_empty());
}
let oldend = self.end;
self.end += readlen;
let newbytes = &mut self.buf[oldend..self.end];
// [Binary detection happens here - see Section 3]
// Look for line terminator in newly read bytes
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
self.last_lineterm = oldend + i + 1; // Include the terminator
return Ok(true);
}
// No complete line yet - keep reading
}
}
/// Roll unconsumed bytes to front of buffer, freeing space at end
fn roll(&mut self) {
if self.pos == self.end {
// Buffer fully consumed - just reset
self.pos = 0;
self.last_lineterm = 0;
self.end = 0;
return;
}
let roll_len = self.end - self.pos;
self.buf.copy_within(self.pos..self.end, 0); // Efficient in-place move
self.pos = 0;
self.last_lineterm = roll_len;
self.end = roll_len;
}
The roll() operation uses copy_within for efficient in-place byte movement, avoiding allocation. After rolling, last_lineterm equals end because the partial line is now at the buffer's end.
Quick Reference¶
Buffer Position Diagram¶
buf: [consumed | complete lines | partial line | free space]
^ ^ ^ ^
0 pos last_lineterm end len
After roll():
buf: [complete lines + partial | free space ]
^ ^
pos=0 last_lineterm=end len
Key Methods¶
| Method | Returns | Purpose |
|---|---|---|
buffer() |
&[u8] |
Complete lines ready for processing |
free_buffer() |
&mut [u8] |
Space available for reading |
consume(amt) |
() |
Mark bytes as processed |
fill(&mut rdr) |
Result<bool, io::Error> |
Read until complete line or EOF |
roll() |
() |
Move unconsumed data to buffer front |
Binary Detection Summary¶
| Variant | On Binary Byte | Continues Reading | Records Offset |
|---|---|---|---|
None |
No action | Yes | No |
Quit(byte) |
Acts as EOF | No | Yes |
Convert(byte) |
Replace with \n |
Yes | Yes (first only) |