From 9be9fda5f9918216d415e5047b2a510bf9631bdd Mon Sep 17 00:00:00 2001
From: Scott Steele <scott.steele@distilnetworks.com>
Date: Wed, 7 Dec 2016 20:50:44 -0500
Subject: [PATCH] Verify character class still non-empty after converting to
 byte class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For `[^\x00-\xff]`, while it is still treated as a full Unicode
character class, it is not empty. For instance `≥` would still be
matched.

However, when `CharClass::to_byte_class` is called on it (as is done
when using `regex::bytes::Regex::new` rather than `regex::Regex::new`),
it _is_ now empty, since it excludes all possible bytes.

This commit adds a test asserting that `regex::bytes::Regex::new`
returns `Err` for this case (in accordance with
https://github.com/rust-lang-nursery/regex/issues/106) and adds an
`is_empty` check to the result of calling `CharClass::to_byte_class`,
which allows the test to pass.
---
 src/parser.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/parser.rs b/src/parser.rs
index 1d10572..ed9f33b 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -596,7 +596,17 @@ impl Parser {
         Ok(Build::Expr(if self.flags.unicode {
             Expr::Class(class)
         } else {
-            Expr::ClassBytes(class.to_byte_class())
+            let byte_class = class.to_byte_class();
+
+            // If `class` was only non-empty due to multibyte characters, the 
+            // corresponding byte class will now be empty.
+            //
+            // See https://github.com/rust-lang-nursery/regex/issues/303
+            if byte_class.is_empty() {
+                return Err(self.err(ErrorKind::EmptyClass));
+            }
+
+            Expr::ClassBytes(byte_class)
         }))
     }