Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Circuit for Removing the Quoted Printable Encoding Newlines in Email Body #202

Merged
merged 11 commits into from
Jul 30, 2024
115 changes: 115 additions & 0 deletions packages/circuits/helpers/remove-soft-line-breaks.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
pragma circom 2.1.6;

include "circomlib/circuits/comparators.circom";
include "circomlib/circuits/mux1.circom";
include "../utils/hash.circom";

template RemoveSoftLineBreaks(maxLength) {
signal input encoded[maxLength];
signal input decoded[maxLength];
signal output is_valid;
shreyas-londhe marked this conversation as resolved.
Show resolved Hide resolved

// Helper signals
signal r;
signal processed[maxLength];
signal is_equals[maxLength];
signal is_cr[maxLength];
signal is_lf[maxLength];
signal temp_soft_break[maxLength - 2];
signal is_soft_break[maxLength];
signal should_zero[maxLength];
signal is_valid_char[maxLength];
signal r_enc[maxLength];
signal sum_enc[maxLength];
signal r_dec[maxLength];
signal sum_dec[maxLength];

// Helper components
component mux_enc[maxLength];

// Deriving r from Poseidon hash
component r_hasher = PoseidonModular(2 * maxLength);
for (var i = 0; i < maxLength; i++) {
r_hasher.in[i] <== encoded[i];
}
for (var i = 0; i < maxLength; i++) {
r_hasher.in[maxLength + i] <== decoded[i];
}
r <== r_hasher.out;

// Check for '=' (61 in ASCII)
for (var i = 0; i < maxLength; i++) {
is_equals[i] <== IsEqual()([encoded[i], 61]);
}

// Check for '\r' (13 in ASCII)
for (var i = 0; i < maxLength - 1; i++) {
is_cr[i] <== IsEqual()([encoded[i + 1], 13]);
}
is_cr[maxLength - 1] <== 0;

// Check for '\n' (10 in ASCII)
for (var i = 0; i < maxLength - 2; i++) {
is_lf[i] <== IsEqual()([encoded[i + 2], 10]);
}
is_lf[maxLength - 2] <== 0;
is_lf[maxLength - 1] <== 0;

// Identify soft line breaks
for (var i = 0; i < maxLength - 2; i++) {
temp_soft_break[i] <== is_equals[i] * is_cr[i];
is_soft_break[i] <== temp_soft_break[i] * is_lf[i];
}
// Handle the last two characters
is_soft_break[maxLength - 2] <== 0;
is_soft_break[maxLength - 1] <== 0;

// Determine which characters should be zeroed
for (var i = 0; i < maxLength; i++) {
if (i == 0) {
should_zero[i] <== is_soft_break[i];
} else if (i == 1) {
should_zero[i] <== is_soft_break[i] + is_soft_break[i-1];
Divide-By-0 marked this conversation as resolved.
Show resolved Hide resolved
} else if (i == maxLength - 1) {
should_zero[i] <== is_soft_break[i-1] + is_soft_break[i-2];
} else {
should_zero[i] <== is_soft_break[i] + is_soft_break[i-1] + is_soft_break[i-2];
}
}

// Process the encoded input
for (var i = 0; i < maxLength; i++) {
processed[i] <== (1 - should_zero[i]) * encoded[i];
}

// Calculate powers of r for encoded
r_enc[0] <== 1;
for (var i = 1; i < maxLength; i++) {
mux_enc[i] = Mux1();
mux_enc[i].c[0] <== r_enc[i - 1] * r;
mux_enc[i].c[1] <== r_enc[i - 1];
mux_enc[i].s <== should_zero[i];
r_enc[i] <== mux_enc[i].out;
}

// Calculate powers of r for decoded
r_dec[0] <== 1;
for (var i = 1; i < maxLength; i++) {
r_dec[i] <== r_dec[i - 1] * r;
}

// Calculate rlc for processed
sum_enc[0] <== processed[0];
for (var i = 1; i < maxLength; i++) {
sum_enc[i] <== sum_enc[i - 1] + r_enc[i] * processed[i];
}

// Calculate rlc for decoded
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking of we need to verify the k characters it got replaced by were zero? Tho I think the random computation should cover but but not sure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the RLC takes care of that

sum_dec[0] <== decoded[0];
for (var i = 1; i < maxLength; i++) {
sum_dec[i] <== sum_dec[i - 1] + r_dec[i] * decoded[i];
}

// Check if rlc for decoded is equal to rlc for encoded
is_valid <== IsEqual()([sum_enc[maxLength - 1], sum_dec[maxLength - 1]]);
}
224 changes: 224 additions & 0 deletions packages/circuits/tests/remove-soft-line-breaks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
import { wasm as wasm_tester } from 'circom_tester';
import path from 'path';

describe('RemoveSoftLineBreaks', () => {
let circuit: any;

beforeAll(async () => {
circuit = await wasm_tester(
path.join(
__dirname,
'./test-circuits/remove-soft-line-breaks-test.circom'
),
{
recompile: true,
include: path.join(__dirname, '../../../node_modules'),
output: path.join(__dirname, './compiled-test-circuits'),
}
);
});

it('should correctly remove soft line breaks', async () => {
const input = {
encoded: [
115,
101,
115,
58,
61,
13,
10,
45,
32,
83,
114,
101,
97,
107,
61,
13,
10,
...Array(15).fill(0),
],
decoded: [
115,
101,
115,
58,
45,
32,
83,
114,
101,
97,
107,
...Array(21).fill(0),
],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});

it('should fail when decoded input is incorrect', async () => {
const input = {
encoded: [
115,
101,
115,
58,
61,
13,
10,
45,
32,
83,
114,
101,
97,
107,
61,
13,
10,
...Array(15).fill(0),
],
decoded: [
115,
101,
115,
58,
45,
32,
83,
114,
101,
97,
108, // Changed last character
...Array(21).fill(0),
],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 0,
});
});

it('should handle input with no soft line breaks', async () => {
const input = {
encoded: [104, 101, 108, 108, 111, ...Array(27).fill(0)],
decoded: [104, 101, 108, 108, 111, ...Array(27).fill(0)],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});

it('should handle input with multiple consecutive soft line breaks', async () => {
const input = {
encoded: [
104,
101,
108,
108,
111,
61,
13,
10,
61,
13,
10,
119,
111,
114,
108,
100,
...Array(16).fill(0),
],
decoded: [
104,
101,
108,
108,
111,
119,
111,
114,
108,
100,
...Array(22).fill(0),
],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});

// Note: The circuit currently does not handle the case when the encoded input starts with a soft line break.
// This test is included to document the expected behavior, but it will fail with the current implementation.
it('should handle input with soft line break at the beginning', async () => {
const input = {
encoded: [61, 13, 10, 104, 101, 108, 108, 111, ...Array(24).fill(0)],
decoded: [104, 101, 108, 108, 111, ...Array(27).fill(0)],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});

it('should handle input with soft line break at the end', async () => {
const input = {
encoded: [104, 101, 108, 108, 111, 61, 13, 10, ...Array(24).fill(0)],
decoded: [104, 101, 108, 108, 111, ...Array(27).fill(0)],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});

it('should handle input with incomplete soft line break sequence', async () => {
const input = {
encoded: [
104,
101,
108,
108,
111,
61,
13,
11, // Not a soft line break (LF should be 10)
...Array(24).fill(0),
],
decoded: [104, 101, 108, 108, 111, 61, 13, 11, ...Array(24).fill(0)],
};

const witness = await circuit.calculateWitness(input);
await circuit.checkConstraints(witness);

await circuit.assertOut(witness, {
is_valid: 1,
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pragma circom 2.1.6;

include "../../helpers/remove-soft-line-breaks.circom";

component main = RemoveSoftLineBreaks(32);
22 changes: 22 additions & 0 deletions packages/circuits/utils/array.circom
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,25 @@ template AssertZeroPadding(maxArrayLen) {
lessThans[i].out * in[i] === 0;
}
}

/// @title Slice
/// @notice Extract a fixed portion of an array
/// @dev Unlike SelectSubArray, Slice uses compile-time known indices and doesn't pad the output
/// @dev Slice is more efficient for fixed ranges, while SelectSubArray offers runtime flexibility
/// @param n The length of the input array
/// @param start The starting index of the slice (inclusive)
/// @param end The ending index of the slice (exclusive)
/// @input in The input array of length n
/// @output out The sliced array of length (end - start)
template Slice(n, start, end) {
assert(n >= end);
assert(start >= 0);
assert(end >= start);

signal input in[n];
signal output out[end - start];

for (var i = start; i < end; i++) {
out[i - start] <== in[i];
}
}
Loading
Loading