Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Circuit for Removing the Quoted Printable Encoding Newlines in Email Body #202

Merged
merged 11 commits into from
Jul 30, 2024
19 changes: 18 additions & 1 deletion packages/circuits/email-verifier.circom
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ include "./lib/sha.circom";
include "./utils/array.circom";
include "./utils/regex.circom";
include "./utils/hash.circom";
include "./helpers/remove-soft-line-breaks.circom";


/// @title EmailVerifier
Expand All @@ -20,6 +21,7 @@ include "./utils/hash.circom";
/// @param n Number of bits per chunk the RSA key is split into. Recommended to be 121.
/// @param k Number of chunks the RSA key is split into. Recommended to be 17.
/// @param ignoreBodyHashCheck Set 1 to skip body hash check in case data to prove/extract is only in the headers.
/// @param removeSoftLineBreaks Set 1 to remove soft line breaks from the email body.
/// @input emailHeader[maxHeadersLength] Email headers that are signed (ones in `DKIM-Signature` header) as ASCII int[], padded as per SHA-256 block size.
/// @input emailHeaderLength Length of the email header including the SHA-256 padding.
/// @input pubkey[k] RSA public key split into k chunks of n bits each.
Expand All @@ -28,8 +30,10 @@ include "./utils/hash.circom";
/// @input emailBodyLength Length of the email body including the SHA-256 padding.
/// @input bodyHashIndex Index of the body hash `bh` in the emailHeader.
/// @input precomputedSHA[32] Precomputed SHA-256 hash of the email body till the bodyHashIndex.
/// @input decodedEmailBodyIn[maxBodyLength] Decoded email body without soft line breaks.
/// @output pubkeyHash Poseidon hash of the pubkey - Poseidon(n/2)(n/2 chunks of pubkey with k*2 bits per chunk).
template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck) {
/// @output decodedEmailBodyOut[maxBodyLength] Decoded email body with soft line breaks removed.
template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck, removeSoftLineBreaks) {
shreyas-londhe marked this conversation as resolved.
Show resolved Hide resolved
assert(maxHeadersLength % 64 == 0);
assert(maxBodyLength % 64 == 0);
assert(n * k > 2048); // to support 2048 bit RSA
Expand Down Expand Up @@ -122,6 +126,19 @@ template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashChec
}
computedBodyHashInts[i].out === headerBodyHash[i];
}

if (removeSoftLineBreaks == 1) {
signal input decodedEmailBodyIn[maxBodyLength];
signal output decodedEmailBodyOut[maxBodyLength];
component qpEncodingChecker = RemoveSoftLineBreaks(maxBodyLength);

qpEncodingChecker.encoded <== emailBody;
qpEncodingChecker.decoded <== decodedEmailBodyIn;

qpEncodingChecker.isValid === 1;

decodedEmailBodyOut <== qpEncodingChecker.decoded;
}
}


Expand Down
121 changes: 121 additions & 0 deletions packages/circuits/helpers/remove-soft-line-breaks.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
pragma circom 2.1.6;

include "circomlib/circuits/comparators.circom";
include "circomlib/circuits/mux1.circom";
include "../utils/hash.circom";

/// @title RemoveSoftLineBreaks
/// @notice This template verifies the removal of soft line breaks from an encoded input string
/// @dev Soft line breaks are defined as "=\r\n" sequences in the encoded input
/// @param maxLength The maximum length of the input strings
/// @input encoded An array of ASCII values representing the input string with potential soft line breaks
/// @input decoded An array of ASCII values representing the expected output after removing soft line breaks
/// @output isValid A signal that is 1 if the decoded input correctly represents the encoded input with soft line breaks removed, 0 otherwise
template RemoveSoftLineBreaks(maxLength) {
signal input encoded[maxLength];
signal input decoded[maxLength];
signal output isValid;

// Helper signals
signal r;
signal processed[maxLength];
signal isEquals[maxLength];
signal isCr[maxLength];
signal isLf[maxLength];
signal tempSoftBreak[maxLength - 2];
signal isSoftBreak[maxLength];
signal shouldZero[maxLength];
signal rEnc[maxLength];
signal sumEnc[maxLength];
signal rDec[maxLength];
signal sumDec[maxLength];

// Helper components
component muxEnc[maxLength];

// Deriving r from Poseidon hash
component rHasher = PoseidonModular(2 * maxLength);
for (var i = 0; i < maxLength; i++) {
rHasher.in[i] <== encoded[i];
}
for (var i = 0; i < maxLength; i++) {
rHasher.in[maxLength + i] <== decoded[i];
}
r <== rHasher.out;

// Check for '=' (61 in ASCII)
for (var i = 0; i < maxLength; i++) {
isEquals[i] <== IsEqual()([encoded[i], 61]);
}

// Check for '\r' (13 in ASCII)
for (var i = 0; i < maxLength - 1; i++) {
isCr[i] <== IsEqual()([encoded[i + 1], 13]);
}
isCr[maxLength - 1] <== 0;

// Check for '\n' (10 in ASCII)
for (var i = 0; i < maxLength - 2; i++) {
isLf[i] <== IsEqual()([encoded[i + 2], 10]);
}
isLf[maxLength - 2] <== 0;
isLf[maxLength - 1] <== 0;

// Identify soft line breaks
for (var i = 0; i < maxLength - 2; i++) {
tempSoftBreak[i] <== isEquals[i] * isCr[i];
isSoftBreak[i] <== tempSoftBreak[i] * isLf[i];
}
// Handle the last two characters
isSoftBreak[maxLength - 2] <== 0;
isSoftBreak[maxLength - 1] <== 0;

// Determine which characters should be zeroed
for (var i = 0; i < maxLength; i++) {
if (i == 0) {
shouldZero[i] <== isSoftBreak[i];
} else if (i == 1) {
shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1];
} else if (i == maxLength - 1) {
shouldZero[i] <== isSoftBreak[i-1] + isSoftBreak[i-2];
} else {
shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1] + isSoftBreak[i-2];
}
}

// Process the encoded input
for (var i = 0; i < maxLength; i++) {
processed[i] <== (1 - shouldZero[i]) * encoded[i];
}

// Calculate powers of r for encoded
rEnc[0] <== 1;
for (var i = 1; i < maxLength; i++) {
muxEnc[i] = Mux1();
muxEnc[i].c[0] <== rEnc[i - 1] * r;
muxEnc[i].c[1] <== rEnc[i - 1];
muxEnc[i].s <== shouldZero[i];
rEnc[i] <== muxEnc[i].out;
}

// Calculate powers of r for decoded
rDec[0] <== 1;
for (var i = 1; i < maxLength; i++) {
rDec[i] <== rDec[i - 1] * r;
}

// Calculate rlc for processed
sumEnc[0] <== processed[0];
for (var i = 1; i < maxLength; i++) {
sumEnc[i] <== sumEnc[i - 1] + rEnc[i] * processed[i];
}

// Calculate rlc for decoded
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking of we need to verify the k characters it got replaced by were zero? Tho I think the random computation should cover but but not sure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the RLC takes care of that

sumDec[0] <== decoded[0];
for (var i = 1; i < maxLength; i++) {
sumDec[i] <== sumDec[i - 1] + rDec[i] * decoded[i];
}

// Check if rlc for decoded is equal to rlc for encoded
isValid <== IsEqual()([sumEnc[maxLength - 1], sumDec[maxLength - 1]]);
}
42 changes: 42 additions & 0 deletions packages/circuits/tests/email-verifier.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,45 @@ describe("EmailVerifier : Without body check", () => {
await circuit.checkConstraints(witness);
});
});

describe('EmailVerifier : With soft line breaks', () => {
jest.setTimeout(10 * 60 * 1000); // 10 minutes

let dkimResult: DKIMVerificationResult;
let circuit: any;

beforeAll(async () => {
const rawEmail = fs.readFileSync(
path.join(__dirname, './test-emails/lorem_ipsum.eml'),
'utf8'
);
dkimResult = await verifyDKIMSignature(rawEmail);

circuit = await wasm_tester(
path.join(
__dirname,
'./test-circuits/email-verifier-with-soft-line-breaks-test.circom'
),
{
recompile: true,
include: path.join(__dirname, '../../../node_modules'),
output: path.join(__dirname, "./compiled-test-circuits"),
}
);
});

it('should verify email when removeSoftLineBreaks is true', async function () {
const emailVerifierInputs = generateEmailVerifierInputsFromDKIMResult(
dkimResult,
{
maxHeadersLength: 640,
maxBodyLength: 1408,
ignoreBodyHashCheck: false,
removeSoftLineBreaks: true,
}
);

const witness = await circuit.calculateWitness(emailVerifierInputs);
await circuit.checkConstraints(witness);
});
});
Loading
Loading