From fa02bad9cea67e003da1fd4b30c48b4f4b726109 Mon Sep 17 00:00:00 2001 From: Marcel Greter Date: Sat, 22 Aug 2015 05:26:58 +0200 Subject: [PATCH 1/2] Add unicode sequence parsing --- src/context.cpp | 1 - src/prelexer.cpp | 34 ++++++++++++++++++++++++++++++++++ src/prelexer.hpp | 5 +++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/context.cpp b/src/context.cpp index 1602d13cd..25aa68667 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -32,7 +32,6 @@ #include "sass2scss.h" #include "prelexer.hpp" #include "emitter.hpp" -#include "debugger.hpp" namespace Sass { using namespace Constants; diff --git a/src/prelexer.cpp b/src/prelexer.cpp index 1834bf21a..86cc335d9 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -78,6 +78,7 @@ namespace Sass { const char* identifier_alpha(const char* src) { return alternatives< + unicode_seq, alpha, unicode, exactly<'-'>, @@ -90,6 +91,7 @@ namespace Sass { const char* identifier_alnum(const char* src) { return alternatives< + unicode_seq, alnum, unicode, exactly<'-'>, @@ -153,6 +155,7 @@ namespace Sass { re_linebreak >, escape_seq, + unicode_seq, // skip interpolants interpolant, // skip non delimiters @@ -176,6 +179,7 @@ namespace Sass { re_linebreak >, escape_seq, + unicode_seq, // skip interpolants interpolant, // skip non delimiters @@ -832,6 +836,20 @@ namespace Sass { return (p == 0) ? t.end : 0; } + const char* unicode_seq(const char* src) { + return sequence < + alternatives < + exactly< 'U' >, + exactly< 'u' > + >, + exactly< '+' >, + padded_token < + 6, xdigit, + exactly < '?' > + > + >(src); + } + const char* static_component(const char* src) { return alternatives< identifier, static_string, @@ -926,5 +944,21 @@ namespace Sass { return sequence< number, optional_spaces, exactly<'/'>, optional_spaces, number >(src); } + template + const char* padded_token(const char* src) + { + size_t got = 0; + const char* pos = src; + while (got < size) { + if (!mx(pos)) break; + ++ pos; ++ got; + } + while (got < size) { + if (!pad(pos)) break; + ++ pos; ++ got; + } + return got ? pos : 0; + } + } } diff --git a/src/prelexer.hpp b/src/prelexer.hpp index 1fd037cd4..de4a89481 100644 --- a/src/prelexer.hpp +++ b/src/prelexer.hpp @@ -227,6 +227,8 @@ namespace Sass { const char* kwd_charset_directive(const char* src); const char* kwd_extend(const char* src); + const char* unicode_seq(const char* src); + const char* kwd_if_directive(const char* src); const char* kwd_else_directive(const char* src); const char* elseif_directive(const char* src); @@ -388,6 +390,9 @@ namespace Sass { return counter; } + template + const char* padded_token(const char* src); + } } From 4ccdb250157fe0b1e146ef50d86a3c2769fbfaf4 Mon Sep 17 00:00:00 2001 From: Marcel Greter Date: Sat, 22 Aug 2015 06:01:48 +0200 Subject: [PATCH 2/2] Improve escaped sequence parsing --- src/prelexer.cpp | 24 +++++++++++++++++++++++- src/prelexer.hpp | 3 +++ src/util.cpp | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/prelexer.cpp b/src/prelexer.cpp index 86cc335d9..fe999b5ca 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -70,7 +70,15 @@ namespace Sass { { return sequence< exactly<'\\'>, - any_char + alternatives < + minmax_range< + 3, 3, xdigit + >, + any_char + >, + optional < + exactly <' '> + > >(src); } @@ -960,5 +968,19 @@ namespace Sass { return got ? pos : 0; } + template + const char* minmax_range(const char* src) + { + size_t got = 0; + const char* pos = src; + while (got < max) { + if (!mx(pos)) break; + ++ pos; ++ got; + } + if (got < min) return 0; + if (got > min) return 0; + return pos; + } + } } diff --git a/src/prelexer.hpp b/src/prelexer.hpp index de4a89481..867a89e53 100644 --- a/src/prelexer.hpp +++ b/src/prelexer.hpp @@ -393,6 +393,9 @@ namespace Sass { template const char* padded_token(const char* src); + template + const char* minmax_range(const char* src); + } } diff --git a/src/util.cpp b/src/util.cpp index a0ba1b541..9532462e8 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -363,6 +363,8 @@ namespace Sass { // ToDo: Maybe we could do this without creating a substring uint32_t cp = strtol(s.substr (i + 1, len - 1).c_str(), nullptr, 16); + if (s[i + len] == ' ') ++ len; + // assert invalid code points if (cp == 0) cp = 0xFFFD; // replace bell character