Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(runtime): stream processing #22

Merged
merged 3 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ license = "MIT OR Apache-2.0"
edition = "2021"

[dependencies]
bytesize = "1.3"
clap = { version = ">=4.0, <4.5", features = ["derive"] }
codespan-reporting = "0.11.1"
content_inspector = "0.2.4"
Expand Down
42 changes: 33 additions & 9 deletions src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
//! Command line arguments parser.

use std::io::BufReader;
use std::path::PathBuf;
use std::str::FromStr;
use std::{fmt, fs, io};

use bytesize::ByteSize;
use clap::{Parser, Subcommand, ValueEnum};
use shadow_rs::formatcp;
use thisctx::IntoError;

use crate::icon::Substitution;
use crate::input::InputReader;
use crate::{error, shadow};

const V_PATH: &str = "PATH";
const V_SOURCE: &str = "SOURCE";
const V_SUBSTITUTION: &str = "SUBSTITUTION";
const V_FORMAT: &str = "FORMAT";
const V_SIZE: &str = "SIZE";
const DEFAULT_SIZE: &str = "16MB";
const INDEX_REV: &str = include_str!("index-rev");
const CLAP_LONG_VERSION: &str = formatcp!("{}\ncheat-sheet: {}", shadow::PKG_VERSION, INDEX_REV);

Expand Down Expand Up @@ -94,6 +99,9 @@ pub enum Command {
/// Do not skip binary files.
#[arg(long)]
include_binary: bool,
/// Set the file size limit (0 to disable it).
#[arg(long, value_name= V_SIZE, default_value = DEFAULT_SIZE)]
size_limit: ByteSize,
/// Path(s) of files to check.
#[arg(value_name = V_PATH)]
source: Vec<IoPath>,
Expand All @@ -115,6 +123,9 @@ pub enum Command {
/// Do not skip binary files.
#[arg(long)]
include_binary: bool,
/// Set the file size limit (0 to disable it).
#[arg(long, value_name= V_SIZE, default_value = DEFAULT_SIZE)]
size_limit: ByteSize,
/// Path tuple(s) of files to read from and write to.
///
/// Each tuple is an input path followed by an optional output path,
Expand Down Expand Up @@ -184,18 +195,31 @@ impl fmt::Display for IoPath {
}

impl IoPath {
pub fn read_all(&self) -> io::Result<Vec<u8>> {
let mut buf = Vec::new();
match self {
IoPath::Stdio => _ = io::Read::read_to_end(&mut io::stdin(), &mut buf)?,
IoPath::Path(path) => _ = io::Read::read_to_end(&mut fs::File::open(path)?, &mut buf)?,
};
Ok(buf)
pub fn metadata(&self) -> io::Result<Option<fs::Metadata>> {
if let IoPath::Path(path) = self {
fs::metadata(path).map(Some)
} else {
Ok(None)
}
}

pub fn file_size(&self) -> io::Result<Option<u64>> {
self.metadata().map(|t| t.map(|m| m.len()))
}

fn get_reader(&self) -> io::Result<Box<dyn io::BufRead>> {
Ok(match self {
IoPath::Stdio => Box::new(BufReader::new(io::stdin())) as _,
IoPath::Path(path) => Box::new(BufReader::new(fs::File::open(path)?)) as _,
})
}

pub fn open(&self) -> io::Result<InputReader> {
self.get_reader().map(InputReader::new)
}

pub fn read_to_string(&self) -> io::Result<String> {
self.read_all()
.map(|s| String::from_utf8_lossy(&s).as_ref().to_owned())
self.get_reader().and_then(io::read_to_string)
}

pub fn write_str(&self, content: &str) -> io::Result<()> {
Expand Down
11 changes: 9 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use thisctx::WithContext;
use thiserror::Error;

use crate::icon::Icon;
use crate::input::InputLine;
use crate::runtime::Severity;

pub type Result<T, E = Error> = std::result::Result<T, E>;
Expand Down Expand Up @@ -38,6 +39,12 @@ pub enum Error {
#[source]
inquire::InquireError,
),
#[error("Invalid UTF-8 input")]
Utf8(
#[from]
#[source]
std::str::Utf8Error,
),
#[error("Invalid input")]
InvalidInput,
#[error("Invalid codepoint")]
Expand All @@ -50,7 +57,7 @@ pub enum Error {

#[derive(Debug, Error)]
pub(crate) struct ObsoleteIcon<'a> {
pub source_code: &'a str,
pub source_code: &'a InputLine<'a>,
pub icon: &'a Icon,
pub span: (usize, usize),
pub candidates: &'a [&'a Icon],
Expand All @@ -64,7 +71,7 @@ impl fmt::Display for ObsoleteIcon<'_> {

impl Diagnostic for ObsoleteIcon<'_> {
fn source_code(&self) -> Option<&dyn miette::SourceCode> {
Some(&self.source_code)
Some(self.source_code)
}

fn severity(&self) -> Option<miette::Severity> {
Expand Down
175 changes: 175 additions & 0 deletions src/input.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
use std::collections::VecDeque;
use std::io::BufRead;
use std::{io, iter};

use content_inspector::ContentType;
use miette::{MietteSpanContents, SourceCode, SourceSpan, SpanContents};

const LINES_BEFORE: usize = 1;
const LINES_AFTER: usize = 3;

pub struct InputReader<R = Box<dyn BufRead>> {
reader: R,
buffer: Vec<u8>,
/// The absolute positions of each line in the buffer.
line_sizes: VecDeque<usize>,
/// The absolute line number of the current line.
line_count: usize,
/// The absolute position of the current line.
offset: usize,
/// The position of the current line relative to the buffer beginning.
rel_offset: usize,
}

impl<R: BufRead> InputReader<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
buffer: Vec::new(),
line_sizes: VecDeque::new(),
line_count: 0,
offset: 0,
rel_offset: 0,
}
}

pub fn next_line(&mut self) -> io::Result<Option<InputLine>> {
if let Some(n) = self.line_sizes.pop_front() {
// Shift to the second line
debug_assert!(self.line_sizes.len() >= LINES_BEFORE);
self.buffer.drain(..n);
self.rel_offset -= n;

// Peek one line ahead
self.read_line()?;
} else {
// In the initial call, pad precedent empty lines,
self.line_sizes.reserve(LINES_BEFORE + 1 + LINES_AFTER);
self.line_sizes.extend(iter::repeat(0).take(LINES_BEFORE));

// and then peek subsequent context lines
for _ in 0..=LINES_AFTER {
self.read_line()?;
}
}

let source;
if let Some(&size) = self.line_sizes.get(LINES_BEFORE) {
source = Some(InputLine {
buffer: &self.buffer,
line_sizes: &self.line_sizes,
line_count: self.line_count,
offset: self.offset,
rel_offset: self.rel_offset,
size,
});
self.line_count += 1;
self.offset += size;
self.rel_offset += size;
} else {
// EOF reached
self.line_count = usize::MAX;
source = None;
}

Ok(source)
}

fn read_line(&mut self) -> io::Result<usize> {
// TODO: limit line size
let size = self.reader.read_until(b'\n', &mut self.buffer)?;
if size != 0 {
self.line_sizes.push_back(size);
}
Ok(size)
}
}

#[derive(Debug)]
pub struct InputLine<'a> {
buffer: &'a [u8],
line_sizes: &'a VecDeque<usize>,
line_count: usize,
offset: usize,
rel_offset: usize,
size: usize,
}

impl<'a> InputLine<'a> {
/// Returns the content of this line.
pub fn contents(&self) -> &'a [u8] {
&self.buffer[self.rel_offset..self.rel_offset + self.size]
}

/// Returns the absolute offset of a byte index relative to the line start.
pub fn offset_of(&self, i: usize) -> usize {
self.offset + i
}

pub fn content_type(&self) -> ContentType {
content_inspector::inspect(self.buffer)
}
}

impl SourceCode for InputLine<'_> {
fn read_span<'a>(
&'a self,
span: &SourceSpan,
lines_before: usize,
lines_after: usize,
) -> Result<Box<dyn SpanContents<'a> + 'a>, miette::MietteError> {
debug_assert!((self.offset..self.offset + self.size).contains(&span.offset()));

let start;
let offset;
let line;
let column;
if lines_before == 0 {
offset = span.offset();
column = offset - self.offset;
start = self.rel_offset + column;
line = self.line_count;
} else {
// count precedent lines and bytes
let (lines, bytes) = self
.line_sizes
.range(0..LINES_BEFORE)
.copied()
.rev()
.take(lines_before)
.take_while(|&n| n > 0)
.fold((0, 0), |(lines, bytes), n| (lines + 1, bytes + n));

offset = self.offset - bytes;
column = 0;
start = self.rel_offset - bytes;
line = self.line_count - lines;
}

let end;
let line_count;
if lines_after == 0 {
end = start + span.len();
line_count = self.line_count;
} else {
// count subsequent lines and bytes
let (lines, bytes) = self
.line_sizes
.range(LINES_BEFORE..)
.copied()
.take(lines_before + 1)
.take_while(|&n| n > 0)
.fold((0, 0), |(lines, bytes), n| (lines + 1, bytes + n));

end = self.rel_offset + bytes;
line_count = self.line_count + lines;
}

let data = &self.buffer[start..end];
let span = SourceSpan::from((offset, end - start));

Ok(Box::new(MietteSpanContents::new(
data, span, line, column, line_count,
)))
}
}
5 changes: 5 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod autocomplete;
mod cli;
mod error;
mod icon;
mod input;
mod parser;
mod prompt;
mod runtime;
Expand Down Expand Up @@ -108,12 +109,14 @@ fn main_impl() -> error::Result<()> {
source,
recursive,
include_binary,
size_limit,
} => {
let rt = rt.build();
let mut context = CheckerContext {
format,
writer: Box::new(std::io::stdout()),
include_binary,
size_limit: size_limit.as_u64(),
..Default::default()
};
for source in walk(source.into_iter().map(|p| Source(p, None)), recursive) {
Expand All @@ -131,6 +134,7 @@ fn main_impl() -> error::Result<()> {
select_first,
recursive,
include_binary,
size_limit,
source,
} => {
if yes {
Expand All @@ -141,6 +145,7 @@ fn main_impl() -> error::Result<()> {
write,
select_first,
include_binary,
size_limit: size_limit.as_u64(),
..Default::default()
};
let mut buffer = String::new();
Expand Down
Loading
Loading