Skip to content

Commit

Permalink
Unicode upgrades.
Browse files Browse the repository at this point in the history
Also closes #10.
  • Loading branch information
katzdm committed Jun 12, 2024
1 parent d1cd625 commit 133a61c
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 119 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,8 @@ class Lexer : public PreprocessorLexer {
/// Check if this is the first time we're lexing the input file.
bool isFirstTimeLexingFile() const { return IsFirstTimeLexingFile; }

bool validateAndRewriteIdentifier(std::string &In);

private:
//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
Expand Down
79 changes: 79 additions & 0 deletions clang/lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4689,3 +4689,82 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
convertDependencyDirectiveToken(DDTok, Result);
return false;
}

bool Lexer::validateAndRewriteIdentifier(std::string &In) {
SmallVector<llvm::UTF32, 30> RewriteUTF32;

static const llvm::sys::UnicodeCharRange DigitRanges[] = {
{0x0030, 0x0039}
};
static llvm::sys::UnicodeCharRange NondigitRanges[] = {
{0x0041, 0x005A}, {0x005F, 0x005F}, {0x0061, 0x007A}
};
static const llvm::sys::UnicodeCharSet DigitChars(DigitRanges);
static const llvm::sys::UnicodeCharSet NondigitChars(NondigitRanges);
static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges);

if (In.size() == 0)
return false;

const auto *Cursor = &In[0];
const auto *End = Cursor + In.size();

// Validate leading character.
if (*Cursor == '\\') {
const char *SlashLoc = Cursor++;
std::optional<uint32_t> UCN = tryReadUCN(Cursor, SlashLoc, nullptr);
if (!UCN || !XIDStartChars.contains(UCN.value()))
return false;
RewriteUTF32.push_back(UCN.value());
} else {
llvm::UTF32 CodePoint;

if (llvm::conversionOK != llvm::convertUTF8Sequence(
reinterpret_cast<const llvm::UTF8 **>(&Cursor),
reinterpret_cast<const llvm::UTF8 *>(End), &CodePoint,
llvm::ConversionFlags::strictConversion))
return false;

if (!NondigitChars.contains(CodePoint) &&
!XIDStartChars.contains(CodePoint))
return false;
RewriteUTF32.push_back(CodePoint);
}

// Validate remaining characters.
while (Cursor < End) {
if (*Cursor == '\\') {
const char *SlashLoc = Cursor++;
std::optional<uint32_t> UCN = tryReadUCN(Cursor, SlashLoc, nullptr);
if (!UCN || !(XIDStartChars.contains(UCN.value()) ||
XIDContinueChars.contains(UCN.value())))
return false;
RewriteUTF32.push_back(UCN.value());
} else {
llvm::UTF32 CodePoint;

if (llvm::conversionOK != llvm::convertUTF8Sequence(
reinterpret_cast<const llvm::UTF8 **>(&Cursor),
reinterpret_cast<const llvm::UTF8 *>(End), &CodePoint,
llvm::ConversionFlags::strictConversion))
return false;

if (!DigitChars.contains(CodePoint) &&
!NondigitChars.contains(CodePoint) &&
!XIDStartChars.contains(CodePoint) &&
!XIDContinueChars.contains(CodePoint))
return false;
RewriteUTF32.push_back(CodePoint);
}
}
assert(Cursor == End);

std::string Rewrite;
Rewrite.reserve(RewriteUTF32.size() * 4);
if (!llvm::convertUTF32ToUTF8String(RewriteUTF32, Rewrite))
return true;
In = Rewrite;

return true;
}
135 changes: 61 additions & 74 deletions clang/lib/Sema/Metafunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "clang/AST/RecordLayout.h"
#include "clang/AST/Reflection.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Metafunction.h"
#include "clang/Sema/ParsedTemplate.h"
Expand Down Expand Up @@ -392,7 +393,7 @@ static constexpr Metafunction Metafunctions[] = {
{ Metafunction::MFRK_bool, 1, 1, is_special_member },
{ Metafunction::MFRK_metaInfo, 2, 2, reflect_result },
{ Metafunction::MFRK_metaInfo, 5, 5, reflect_invoke },
{ Metafunction::MFRK_metaInfo, 9, 9, data_member_spec },
{ Metafunction::MFRK_metaInfo, 10, 10, data_member_spec },
{ Metafunction::MFRK_metaInfo, 3, 3, define_class },
{ Metafunction::MFRK_sizeT, 1, 1, offset_of },
{ Metafunction::MFRK_sizeT, 1, 1, size_of },
Expand Down Expand Up @@ -527,6 +528,7 @@ static void getTypeName(std::string &Result, ASTContext &C, QualType QT,
bool BasicOnly) {
PrintingPolicy PP = C.getPrintingPolicy();
PP.SuppressTagKeyword = true;
PP.SuppressScope = true;

encodeName(Result, QT.getAsString(PP), BasicOnly);
}
Expand Down Expand Up @@ -1779,27 +1781,20 @@ bool can_substitute(APValue &Result, Sema &S, EvalFn Evaluator,

for (uint64_t k = 0; k < nArgs; ++k) {
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[1]->getExprLoc());

ArraySubscriptExpr *SubscriptExpr =
new (S.Context) ArraySubscriptExpr(Args[1], IdxExpr,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());

ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
S.Context.MetaInfoTy,
CK_LValueToRValue,
SubscriptExpr,
nullptr, VK_PRValue,
FPOptionsOverride());
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[1]->getExprLoc());

Synthesized = new (S.Context) ArraySubscriptExpr(Args[1], Synthesized,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
return true;

APValue Unwrapped;
if (!Evaluator(Unwrapped, RVExpr, true) || !Unwrapped.isReflection() ||
if (!Evaluator(Unwrapped, Synthesized, true) ||
!Unwrapped.isReflection() ||
!CanActAsTemplateArg(Unwrapped.getReflection()))
return true;

Expand Down Expand Up @@ -1852,27 +1847,20 @@ bool substitute(APValue &Result, Sema &S, EvalFn Evaluator, QualType ResultTy,

for (uint64_t k = 0; k < nArgs; ++k) {
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[1]->getExprLoc());

ArraySubscriptExpr *SubscriptExpr =
new (S.Context) ArraySubscriptExpr(Args[1], IdxExpr,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());

ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
S.Context.MetaInfoTy,
CK_LValueToRValue,
SubscriptExpr,
nullptr, VK_PRValue,
FPOptionsOverride());
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[1]->getExprLoc());

Synthesized = new (S.Context) ArraySubscriptExpr(Args[1], Synthesized,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
return true;

APValue Unwrapped;
if (!Evaluator(Unwrapped, RVExpr, true) || !Unwrapped.isReflection() ||
if (!Evaluator(Unwrapped, Synthesized, true) ||
!Unwrapped.isReflection() ||
!CanActAsTemplateArg(Unwrapped.getReflection()))
return true;

Expand Down Expand Up @@ -3363,34 +3351,41 @@ bool data_member_spec(APValue &Result, Sema &S, EvalFn Evaluator,
size_t nameLen = Scratch.getInt().getExtValue();
Name.emplace(nameLen, '\0');

// Evaluate the character type.
if (!Evaluator(Scratch, Args[ArgIdx++], true))
return true;
QualType CharTy = Scratch.getReflectedType();

// Evaluate the data contents.
for (uint64_t k = 0; k < nameLen; ++k) {
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[ArgIdx]->getExprLoc());

ArraySubscriptExpr *SubscriptExpr =
new (S.Context) ArraySubscriptExpr(Args[ArgIdx], IdxExpr,
S.Context.CharTy,
VK_LValue, OK_Ordinary,
Range.getBegin());

ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
S.Context.CharTy,
CK_LValueToRValue,
SubscriptExpr,
nullptr, VK_PRValue,
FPOptionsOverride());
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[ArgIdx]->getExprLoc());

Synthesized = new (S.Context) ArraySubscriptExpr(Args[ArgIdx],
Synthesized, CharTy,
VK_LValue, OK_Ordinary,
Range.getBegin());
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
return true;

if (!Evaluator(Scratch, RVExpr, true))
if (!Evaluator(Scratch, Synthesized, true))
return true;

(*Name)[k] = static_cast<char>(Scratch.getInt().getExtValue());
}
ArgIdx++;
} else {
ArgIdx += 2;
ArgIdx += 3;
}

// Validate the name as an identifier.
if (Name) {
Lexer Lex(Range.getBegin(), S.getLangOpts(), Name->data(), Name->data(),
Name->data() + Name->size(), false);
if (!Lex.validateAndRewriteIdentifier(*Name))
return true;
}

// Evaluate whether an alignment was provided.
Expand Down Expand Up @@ -3581,28 +3576,20 @@ bool define_class(APValue &Result, Sema &S, EvalFn Evaluator, QualType ResultTy,
for (size_t k = 0; k < NumMembers; ++k) {
// Extract the reflection from the list of member specs.
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[2]->getExprLoc());

ArraySubscriptExpr *SubscriptExpr =
new (S.Context) ArraySubscriptExpr(Args[2], IdxExpr,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());

ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
S.Context.MetaInfoTy,
CK_LValueToRValue,
SubscriptExpr,
nullptr, VK_PRValue,
FPOptionsOverride());
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent()) {
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
S.Context.getSizeType(),
Args[2]->getExprLoc());

Synthesized = new (S.Context) ArraySubscriptExpr(Args[2], Synthesized,
S.Context.MetaInfoTy,
VK_LValue, OK_Ordinary,
Range.getBegin());
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent()) {
RestoreDC();
return true;
}

if (!Evaluator(Scratch, RVExpr, true) ||
if (!Evaluator(Scratch, Synthesized, true) ||
Scratch.getReflection().getKind() !=
ReflectionValue::RK_data_member_spec) {
RestoreDC();
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaTemplate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8843,6 +8843,8 @@ static ExprResult
BuildExpressionFromReflection(Sema &S, const ReflectionValue &R,
SourceLocation Loc) {
switch (R.getKind()) {
case ReflectionValue::RK_null:
return CXXReflectExpr::Create(S.Context, Loc, Loc);
case ReflectionValue::RK_type:
return CXXReflectExpr::Create(S.Context, Loc, Loc, R.getAsType());
case ReflectionValue::RK_expr_result:
Expand Down
35 changes: 28 additions & 7 deletions libcxx/include/experimental/meta
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ consteval auto has_default_argument(info) -> bool;
#include <span>
#include <string_view>
#include <type_traits>
#include <variant>
#include <vector>

#if __has_feature(reflection)
Expand Down Expand Up @@ -942,7 +943,17 @@ consteval auto reflect_invoke(info target, R1 &&targs, R2 &&args) -> info {

// Representation of a data member which may be passed to 'data_member_spec'.
struct data_member_options_t {
optional<string_view> name = nullopt;
struct name_type {
std::variant<u8string, string> impl;

template <typename T> requires constructible_from<u8string, T>
consteval name_type(T &&in) : impl(std::in_place_type<u8string>, in) {}

template <typename T> requires constructible_from<string, T>
consteval name_type(T &&in) : impl(std::in_place_type<string>, in) {}
};

optional<name_type> name = nullopt;
bool is_static = false;
optional<int> alignment = nullopt;
optional<int> width = nullopt;
Expand All @@ -952,16 +963,26 @@ struct data_member_options_t {
// be used with 'define_class' to define a record type.
consteval auto data_member_spec(info member_type,
data_member_options_t options = {}) -> info {
string_view name = options.name.value_or("");
auto name = options.name.value_or(u8"").impl;
bool is_static = options.is_static;
int alignment = options.alignment.value_or(0);
int width = options.width.value_or(0);

return __metafunction(detail::__metafn_data_member_spec,
member_type, is_static,
options.name.has_value(), name.size(), name.data(),
options.alignment.has_value(), alignment,
options.width.has_value(), width);
if (holds_alternative<u8string>(name)) {
const u8string &s = get<u8string>(name);
return __metafunction(detail::__metafn_data_member_spec,
member_type, is_static, options.name.has_value(),
s.size(), ^const char8_t *, s.data(),
options.alignment.has_value(), alignment,
options.width.has_value(), width);
} else {
const string &s = get<string>(name);
return __metafunction(detail::__metafn_data_member_spec,
member_type, is_static, options.name.has_value(),
s.size(), ^const char *, s.data(),
options.alignment.has_value(), alignment,
options.width.has_value(), width);
}
}

// Completes the definition of the record type reflected by 'class_type' with
Expand Down
Loading

0 comments on commit 133a61c

Please sign in to comment.