Skip to content

Commit

Permalink
Merge branch 'master' into select-columns
Browse files Browse the repository at this point in the history
  • Loading branch information
njaard authored Mar 15, 2024
2 parents e57050b + a71ab8d commit b70eef7
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 172 deletions.
36 changes: 7 additions & 29 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
---
on: [pull_request]
on:
pull_request:
push:
branches:
- master

name: Rust
jobs:
Expand All @@ -8,10 +11,9 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
- os: windows-latest
- os: macos-latest

steps:
Expand Down Expand Up @@ -51,35 +53,11 @@ jobs:
override: true
- name: Run cargo test
run: |
cd heed
cargo clean
cargo check --all-features
examples:
name: Run the sonnerie examples
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
- os: macos-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run the examples
run: |
cargo run --example 2>&1 | grep -E '^ ' | xargs -n1 cargo run --example
fmt:
name: Ensure the heed project is formatted
name: rust-fmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand Down
191 changes: 78 additions & 113 deletions escape_string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,28 @@ use std::borrow::Cow;
///
/// Returns a tuple of the first "word" up until the first unescaped whitespace character,
/// and then every after the whitespace characters.
pub fn split_one_bytes<'a>(bytes: &'a [u8])
-> Option<(Cow<'a, [u8]>, &'a [u8])>
{
pub fn split_one_bytes<'a>(bytes: &'a [u8]) -> Option<(Cow<'a, [u8]>, &'a [u8])> {
let mut start = 0usize;
while let Some(b) = bytes.get(start)
{
if b.is_ascii_whitespace()
{ start+=1; }
else
{ break; }
while let Some(b) = bytes.get(start) {
if b.is_ascii_whitespace() {
start += 1;
} else {
break;
}
}

let mut owned: Option<Vec<u8>> = None;

let mut position = start;

while position < bytes.len()
{
if bytes[position] == b'\\'
{
if !owned.is_some()
{
while position < bytes.len() {
if bytes[position] == b'\\' {
if !owned.is_some() {
owned = Some(bytes[start..position].to_owned());
}
let b = owned.as_mut().unwrap();
position += 1;
match bytes.get(position)
{
match bytes.get(position) {
None => return None,
Some(b'a') => b.push(b'\x07'),
Some(b'b') => b.push(b'\x08'),
Expand All @@ -76,38 +70,32 @@ pub fn split_one_bytes<'a>(bytes: &'a [u8])
Some(b'\\') => b.push(b'\\'),
Some(a) => b.push(*a),
}
position+=1;
}
else if bytes[position].is_ascii_whitespace()
{
position += 1;
} else if bytes[position].is_ascii_whitespace() {
break;
}
else
{
if let Some(o) = owned.as_mut()
{ o.push( bytes[position] ); }
} else {
if let Some(o) = owned.as_mut() {
o.push(bytes[position]);
}
position += 1;
}
}

let mut after = position;
while let Some(b) = bytes.get(after)
{
if b.is_ascii_whitespace()
{ after+=1; }
else
{ break; }
while let Some(b) = bytes.get(after) {
if b.is_ascii_whitespace() {
after += 1;
} else {
break;
}
}

let after = &bytes[after..];

if let Some(owned) = owned
{
Some( (Cow::Owned(owned), after) )
}
else
{
Some( (Cow::Borrowed(&bytes[start..position]), after) )
if let Some(owned) = owned {
Some((Cow::Owned(owned), after))
} else {
Some((Cow::Borrowed(&bytes[start..position]), after))
}
}

Expand All @@ -123,118 +111,95 @@ pub fn split_one_bytes<'a>(bytes: &'a [u8])
/// Returns None if there was an escape character and then nothing
///
/// Does not look at the following text at all.
pub fn split_one<'a>(text: &'a str)
-> Option<(Cow<'a, str>, &'a str)>
{
if let Some((one, remainder)) = split_one_bytes(text.as_bytes())
{
pub fn split_one<'a>(text: &'a str) -> Option<(Cow<'a, str>, &'a str)> {
if let Some((one, remainder)) = split_one_bytes(text.as_bytes()) {
let one_text;
match one
{
Cow::Borrowed(b) =>
one_text = unsafe { Cow::Borrowed( std::str::from_utf8_unchecked(b) ) },
Cow::Owned(b) =>
one_text = unsafe { Cow::Owned( String::from_utf8_unchecked(b) ) },
match one {
Cow::Borrowed(b) => {
one_text = unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(b)) }
}
Cow::Owned(b) => one_text = unsafe { Cow::Owned(String::from_utf8_unchecked(b)) },
}
let remainder = unsafe { std::str::from_utf8_unchecked(remainder) };
Some((one_text, remainder))
}
else
{
} else {
None
}
}

/// Produce all the words as a vector
pub fn split<'a>(mut text: &'a str)
-> Option<Vec<Cow<'a, str>>>
{
let mut res = vec!();
pub fn split<'a>(mut text: &'a str) -> Option<Vec<Cow<'a, str>>> {
let mut res = vec![];

while !text.is_empty()
{
while !text.is_empty() {
let s = split_one(text);
if s.is_none() { return None; }
if s.is_none() {
return None;
}
let s = s.unwrap();
res.push( s.0 );
res.push(s.0);
text = s.1;
}
Some(res)
}

/// Converts text with all the special characters escape with a backslash
pub fn escape<'a>(text: &'a str)
-> Cow<'a, str>
{
pub fn escape<'a>(text: &'a str) -> Cow<'a, str> {
let bytes = text.as_bytes();

let mut owned = None;

for pos in 0..bytes.len()
{
let special =
match bytes[pos]
{
0x07 => Some(b'a'),
0x08 => Some(b'b'),
b'\t' => Some(b't'),
b'\n' => Some(b'n'),
0x0b => Some(b'v'),
0x0c => Some(b'f'),
b'\r' => Some(b'r'),
b' ' => Some(b' '),
b'\\' => Some(b'\\'),
_ => None,
};
if let Some(s) = special
{
if owned.is_none()
{
for pos in 0..bytes.len() {
let special = match bytes[pos] {
0x07 => Some(b'a'),
0x08 => Some(b'b'),
b'\t' => Some(b't'),
b'\n' => Some(b'n'),
0x0b => Some(b'v'),
0x0c => Some(b'f'),
b'\r' => Some(b'r'),
b' ' => Some(b' '),
b'\\' => Some(b'\\'),
_ => None,
};
if let Some(s) = special {
if owned.is_none() {
owned = Some(bytes[0..pos].to_owned());
}
owned.as_mut().unwrap().push(b'\\');
owned.as_mut().unwrap().push(s);
}
else if let Some(owned) = owned.as_mut()
{
owned.push( bytes[pos] );
} else if let Some(owned) = owned.as_mut() {
owned.push(bytes[pos]);
}
}

if let Some(owned) = owned
{
if let Some(owned) = owned {
unsafe { Cow::Owned(String::from_utf8_unchecked(owned)) }
}
else
{
} else {
unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(bytes)) }
}
}

#[cfg(test)]
mod tests
{
use ::split_one;
use ::split;
use ::escape;
mod tests {
use escape;
use split;
use split_one;

fn check(text: &str, one: &str, two: &str)
{
fn check(text: &str, one: &str, two: &str) {
let a = split_one(text);
let a = a.unwrap();
assert_eq!(a.0, one);
assert_eq!(a.1, two);
}

#[test]
fn failure()
{
fn failure() {
assert_eq!(split_one("abc\\"), None);
}

#[test]
fn fine()
{
fn fine() {
check("abc\\\\", "abc\\", "");
check("1525824000000 520893", "1525824000000", "520893");
check("abc\\\\ def", "abc\\", "def");
Expand All @@ -246,23 +211,23 @@ mod tests
check(" ", "", "");
}
#[test]
fn splitting()
{
assert_eq!(format!("{:?}",split("abc\\\\")), "Some([\"abc\\\\\"])");
assert_eq!(format!("{:?}",split("abc def")), "Some([\"abc\", \"def\"])");
assert_eq!(format!("{:?}",split("abc\\ def")), "Some([\"abc def\"])");
fn splitting() {
assert_eq!(format!("{:?}", split("abc\\\\")), "Some([\"abc\\\\\"])");
assert_eq!(
format!("{:?}", split("abc def")),
"Some([\"abc\", \"def\"])"
);
assert_eq!(format!("{:?}", split("abc\\ def")), "Some([\"abc def\"])");
}

#[test]
fn escaping()
{
fn escaping() {
assert_eq!(escape("abc\ndef"), "abc\\ndef");
assert_eq!(escape("abc\n def"), "abc\\n\\ def");
}

#[test]
fn round_trip()
{
fn round_trip() {
check(&escape("ads\nasd"), "ads\nasd", "");
}
}
Loading

0 comments on commit b70eef7

Please sign in to comment.