Skip to content

Commit

Permalink
Merge pull request #66 from MtFmT-Lib/str_utf_8
Browse files Browse the repository at this point in the history
UTF-8: 字符串操作支持
  • Loading branch information
XiangYyang authored Jun 21, 2023
2 parents a72a1b3 + aa9d4a4 commit f10cbad
Show file tree
Hide file tree
Showing 27 changed files with 964 additions and 188 deletions.
30 changes: 26 additions & 4 deletions inc/mm_cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,17 +181,33 @@
* @brief 标记不可达的分支
*
*/
#define mstr_unreachable() assert_param(0)
#define mstr_unreachable() assert_param(0)
/**
* @brief 边界检查
*
*/
#define mstr_bounding_check(expr) assert_param(!!(expr))
#elif defined(_MSTR_RUNTIME_ASSERT)
#include <assert.h>
#define mstr_unreachable() assert(0)
#define mstr_unreachable() assert(0)
#define mstr_bounding_check(expr) assert(!!(expr))
#else
#define mstr_unreachable() ((void)0U)
#define mstr_unreachable() ((void)0U)
#define mstr_bounding_check(expr) ((void)0U)
#endif
#else
#define mstr_unreachable() ((void)0U)
#define mstr_unreachable() ((void)0U)
#define mstr_bounding_check(expr) ((void)0U)
#endif // _MSTR_RUNTIME_CTRLFLOW_MARKER

#if !defined(_MSTR_USE_UTF_8)
/**
* @brief 指定是否启用UTF-8支持
*
*/
#define _MSTR_USE_UTF_8 1
#endif // _MSTR_USE_UTF_8

//
// 导出函数修辞
//
Expand Down Expand Up @@ -245,6 +261,12 @@
*/
#define MSTRCFG_USE_STD_IO 0x08

/**
* @brief 标记是否使用了UTF-8
*
*/
#define MSTRCFG_USE_UTF_8 0x10

/**
* @brief 取得库版本信息
*
Expand Down
4 changes: 4 additions & 0 deletions inc/mm_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ typedef enum tagMStrResult
MStr_Err_HeapTooSmall = -128,
// ERR: 未实现
MStr_Err_NoImplemention,
// ERR: 编码错误
MStr_Err_UnicodeEncodingError,
// ERR: 源字符串不完整
MStr_Err_UnicodeEncodingNotCompleted,
// ERR: buff太短
MStr_Err_BufferTooSmall,
// ERR: 格式化: 索引太多
Expand Down
118 changes: 108 additions & 10 deletions inc/mm_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,75 @@ typedef struct tagMString
*/
char stack_region[MSTR_STACK_REGION_SIZE];

/**
* @brief 字符串的字节长度
*
*/
usize_t count;

/**
* @brief 字符串长度
*
*/
usize_t length;

/**
* @brief 已经分配了的内存大小, cap_size >= length + 1
* @brief 已经分配了的内存大小, cap_size >= count + 1
*
*/
usize_t cap_size;
} MString;

/**
* @brief 字符串迭代器
*
*/
typedef struct tagMStringIter
{
/**
* @brief 位置
*
*/
const char* it;

/**
* @brief 结束位置
*
*/
const char* it_end;

/**
* @brief 剩余的长度
*
*/
usize_t rem_length;
} MStringIter;

/**
* @brief 字符串迭代器 (可变的)
*
*/
typedef struct tagMStringIterMut
{
/**
* @brief 位置
*
*/
char* it;

/**
* @brief 结束位置
*
*/
const char* it_end;

/**
* @brief 剩余的长度
*
*/
usize_t rem_length;
} MStringIterMut;

/**
* @brief 创建字符串
*
Expand All @@ -59,7 +115,7 @@ typedef struct tagMString
MSTR_EXPORT_API(mstr_result_t)
mstr_create(MString* str, const char* content);

// 创建空字符串
//! 创建空字符串
#define mstr_create_empty(s) (mstr_create((s), "\0"))

/**
Expand All @@ -81,11 +137,13 @@ MSTR_EXPORT_API(mstr_result_t)
mstr_copy_create(MString* str, const MString* other);

/**
* @brief 清空字符串
* @brief 取得第idx位置的字符ch
*
* @param[inout] str: 需要清空的字符串
* @attention 边界检查失败会造成assert false, 函数失败会返回0
* 而不是返回mstr_result
*/
MSTR_EXPORT_API(void) mstr_clear(MString* str);
MSTR_EXPORT_API(mstr_codepoint_t)
mstr_char_at(const MString* str, usize_t idx);

/**
* @brief 拼接字符串
Expand Down Expand Up @@ -127,21 +185,29 @@ mstr_concat_cstr_slice(
* @brief 向字符串尾部插入一个字符
*
* @param[inout] str: 字符串
* @param[in] ch: 需要放入的字符
* @param[in] ch: 需要放入的字符(unicode point)
* @return mstr_result_t: 结果
*/
MSTR_EXPORT_API(mstr_result_t) mstr_append(MString* str, char ch);
MSTR_EXPORT_API(mstr_result_t)
mstr_append(MString* str, mstr_codepoint_t ch);

/**
* @brief 向字符串尾部重复插入一个字符
*
* @param[inout] str: 字符串
* @param[in] ch: 需要放入的字符
* @param[in] ch: 需要放入的字符(unicode point)
* @param[in] cnt: 重复次数
* @return mstr_result_t: 结果
*/
MSTR_EXPORT_API(mstr_result_t)
mstr_repeat_append(MString* str, char ch, usize_t cnt);
mstr_repeat_append(MString* str, mstr_codepoint_t ch, usize_t cnt);

/**
* @brief 清空字符串
*
* @param[inout] str: 需要清空的字符串
*/
MSTR_EXPORT_API(void) mstr_clear(MString* str);

/**
* @brief 翻转字符串
Expand All @@ -150,7 +216,7 @@ mstr_repeat_append(MString* str, char ch, usize_t cnt);
*
* @return mstr_result_t: 结果, 永远为MStr_Ok
*/
MSTR_EXPORT_API(mstr_result_t) mstr_reverse_self(MString* str);
MSTR_EXPORT_API(void) mstr_reverse_self(MString* str);

/**
* @brief 转换为cstr
Expand All @@ -170,6 +236,38 @@ MSTR_EXPORT_API(const char*) mstr_as_cstr(MString* str);
*/
MSTR_EXPORT_API(bool_t) mstr_equal(const MString* a, const MString* b);

/**
* @brief 取得迭代器
*
* @param[out] it: 迭代器输出
* @param[in] str: 原字符串
*
*/
MSTR_EXPORT_API(void) mstr_iter(MStringIter* it, const MString* str);

/**
* @brief 取得可变迭代器
*
* @param[out] it: 迭代器输出
* @param[in] str: 原字符串
*/
MSTR_EXPORT_API(void) mstr_iter_mut(MStringIterMut* it, MString* str);

/**
* @brief 判断迭代器是否已到末尾
*
*/
#define mstr_iter_is_end(it) ((it).it == (it).it_end)

/**
* @brief 判断前导字符lead后面的字符长度, 前导字符包括在内
*
* @note 在UTF-8功能启用的情况下, 其判断UTF-8编码的字符长度, 错误返回0,
* 否则, 该函数永远返回1
*
*/
MSTR_EXPORT_API(usize_t) mstr_char_length(char lead);

/**
* @brief 释放一个字符串所占的内存
*
Expand Down
9 changes: 8 additions & 1 deletion inc/mm_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ class string final

public:
using char_t = char;
using value_t = char_t;
using pointer = value_t*;
using const_pointer = const value_t*;
using reference = value_t&;
using const_reference = const value_t&;
using size_type = size_t;
using difference_type = ptrdiff_t;

/**
* @brief 创建空的字符串
Expand Down Expand Up @@ -268,7 +275,7 @@ class string final
*/
std::string as_std_string()
{
return std::string(this_obj.buff, this_obj.length);
return std::string(this_obj.buff, this_obj.count);
}

/**
Expand Down
12 changes: 12 additions & 0 deletions inc/mm_typedef.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ enum
False = false,
};

/**
* @brief 字符
*
*/
typedef char mstr_char_t;

/**
* @brief unicode代码点
*
*/
typedef uint32_t mstr_codepoint_t;

/**
* @brief RTC时间
*
Expand Down
3 changes: 3 additions & 0 deletions src/mm_cfg.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ MSTR_EXPORT_API(uint32_t) mstr_configure(void)
#if _MSTR_USE_STD_IO
configure |= MSTRCFG_USE_STD_IO;
#endif // _MSTR_USE_STD_IO
#if _MSTR_USE_UTF_8
configure |= MSTRCFG_USE_UTF_8;
#endif // _MSTR_USE_UTF_8
// 使用的编译器信息
configure |= MSTR_BUILD_CC << 12;
// ret
Expand Down
2 changes: 1 addition & 1 deletion src/mm_fmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ static mstr_result_t copy_to_output(
return mstr_concat(out_str, src_str);
}
// 计算宽度够不够
src_len = src_str->length;
src_len = src_str->count;
need_width = (usize_t)fmt_spec->width;
if (src_len >= need_width) {
// 宽度太宽, 不管对齐了
Expand Down
12 changes: 9 additions & 3 deletions src/mm_into.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ static mstr_result_t utoa_impl_10base(MString* str, uint32_t value)
}
}
// 翻转转换结果
MSTR_AND_THEN(result, mstr_reverse_self(str));
if (MSTR_SUCC(result)) {
mstr_reverse_self(str);
}
}
return result;
}
Expand Down Expand Up @@ -209,7 +211,9 @@ static mstr_result_t utoa_impl_2base(
}
}
// 翻转转换结果
MSTR_AND_THEN(result, mstr_reverse_self(str));
if (MSTR_SUCC(result)) {
mstr_reverse_self(str);
}
}
return result;
}
Expand Down Expand Up @@ -315,7 +319,9 @@ static mstr_result_t uqtoa_helper_dpart(
}
}
// 翻转结果
MSTR_AND_THEN(result, mstr_reverse_self(&buff));
if (MSTR_SUCC(result)) {
mstr_reverse_self(&buff);
}
}
// copy到输出
MSTR_AND_THEN(result, mstr_concat(str, &buff));
Expand Down
2 changes: 1 addition & 1 deletion src/mm_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ mstr_iovformat(
// 写到输出
MSTR_AND_THEN(
res,
io->io_write(io->capture, (const byte_t*)buff.buff, buff.length)
io->io_write(io->capture, (const byte_t*)buff.buff, buff.count)
);
// 释放
if (MSTR_SUCC(res_create)) {
Expand Down
Loading

0 comments on commit f10cbad

Please sign in to comment.