diff --git a/sakura/sakura.vcxproj b/sakura/sakura.vcxproj index cf905c0f9f..a89b337a14 100644 --- a/sakura/sakura.vcxproj +++ b/sakura/sakura.vcxproj @@ -303,6 +303,7 @@ + @@ -413,6 +414,7 @@ + @@ -647,6 +649,7 @@ + @@ -776,6 +779,7 @@ + diff --git a/sakura/sakura.vcxproj.filters b/sakura/sakura.vcxproj.filters index 0b21690eb5..be2661ada1 100644 --- a/sakura/sakura.vcxproj.filters +++ b/sakura/sakura.vcxproj.filters @@ -119,6 +119,9 @@ {930f3f82-ab3f-49e3-af4a-d4f9c2d51f46} + + {e4629f85-3be8-4dda-80db-1be310929433} + @@ -1085,6 +1088,12 @@ Cpp Source Files\mem + + Cpp Source Files\extmodule + + + Cpp Source Files\charset\icu4c + @@ -2252,6 +2261,12 @@ Cpp Source Files\dlg + + Cpp Source Files\extmodule + + + Cpp Source Files\charset\icu4c + diff --git a/sakura_core/Makefile b/sakura_core/Makefile index 5ea88e13f6..55d68a600b 100644 --- a/sakura_core/Makefile +++ b/sakura_core/Makefile @@ -115,6 +115,7 @@ charset/CUnicode.o \ charset/CUnicodeBe.o \ charset/CUtf7.o \ charset/CUtf8.o \ +charset/icu4c/CharsetDetector.o \ cmd/CViewCommander.o \ cmd/CViewCommander_Bookmark.o \ cmd/CViewCommander_Clipboard.o \ @@ -228,6 +229,7 @@ extmodule/CBregexp.o \ extmodule/CBregexpDll2.o \ extmodule/CDllHandler.o \ extmodule/CHtmlHelp.o \ +extmodule/CIcu4cI18n.o \ extmodule/CMigemo.o \ extmodule/CUxTheme.o \ func/CFuncKeyWnd.o \ diff --git a/sakura_core/charset/CCodeMediator.cpp b/sakura_core/charset/CCodeMediator.cpp index ebfa41b87f..c978dd2b51 100644 --- a/sakura_core/charset/CCodeMediator.cpp +++ b/sakura_core/charset/CCodeMediator.cpp @@ -1,6 +1,7 @@ /*! @file */ #include "StdAfx.h" #include "charset/CCodeMediator.h" +#include "charset/icu4c/CharsetDetector.h" #include "charset/CESI.h" #include "io/CBinaryStream.h" @@ -23,6 +24,13 @@ ECodeType CCodeMediator::CheckKanjiCode(const char* buff, size_t size) noexcept return m_sEncodingConfig.m_eDefaultCodetype; } + // ICU4CのDLL群が利用できる場合、ICU4Cによる判定を試みる + CharsetDetector csd; + if (csd.IsAvailable()) { + auto code = csd.Detect(std::string_view(buff, size)); + if (code != CODE_ERROR) return code; + } + CESI cesi(m_sEncodingConfig); return cesi.CheckKanjiCode(buff, size); } diff --git a/sakura_core/charset/icu4c/CharsetDetector.cpp b/sakura_core/charset/icu4c/CharsetDetector.cpp new file mode 100644 index 0000000000..0979e4b91f --- /dev/null +++ b/sakura_core/charset/icu4c/CharsetDetector.cpp @@ -0,0 +1,77 @@ +/*! @file */ +/* + Copyright (C) 2018-2019 Sakura Editor Organization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ +#include "StdAfx.h" +#include "CharsetDetector.h" + +CharsetDetector::CharsetDetector() noexcept + : _icuin() + , _csd(nullptr) +{ + _icuin.InitDll(); +} + +CharsetDetector::~CharsetDetector() noexcept +{ + if (_icuin.IsAvailable()) { + _icuin.ucsdet_close(_csd); + } +} + +ECodeType CharsetDetector::Detect(const std::string_view& bytes) +{ + UErrorCode status = U_ZERO_ERROR; + + _csd = _icuin.ucsdet_open(&status); + if (status != U_ZERO_ERROR) { + return CODE_ERROR; + } + + _icuin.ucsdet_setText(_csd, bytes.data(), bytes.length(), &status); + if (status != U_ZERO_ERROR) { + return CODE_ERROR; + } + + const auto csm = _icuin.ucsdet_detect(_csd, &status); + if (status != U_ZERO_ERROR) { + return CODE_ERROR; + } + + std::string_view name = _icuin.ucsdet_getName(csm, &status); + if (status != U_ZERO_ERROR) { + return CODE_ERROR; + } + + // 文字セット名⇒サクラエディタ内部コードの変換 + if (name == "UTF-8") return CODE_UTF8; + if (name == "SHIFT_JIS") return CODE_SJIS; + if (name == "UTF-16BE") return CODE_UNICODEBE; + if (name == "UTF-16LE") return CODE_UNICODE; + if (name == "EUC-JP") return CODE_EUC; + if (name == "ISO-2022-JP") return CODE_JIS; + if (name == "UTF-7") return CODE_UTF7; + if (name == "ISO-8859-1") return CODE_LATIN1; + + return CODE_ERROR; +} diff --git a/sakura_core/charset/icu4c/CharsetDetector.h b/sakura_core/charset/icu4c/CharsetDetector.h new file mode 100644 index 0000000000..e43915a4d0 --- /dev/null +++ b/sakura_core/charset/icu4c/CharsetDetector.h @@ -0,0 +1,48 @@ +/*! @file */ +/* + Copyright (C) 2018-2019 Sakura Editor Organization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ +#pragma once + +#include + +#include "extmodule/CIcu4cI18n.h" + +/*! + * @brief 文字コード検出クラス + */ +class CharsetDetector final +{ + CIcu4cI18n _icuin; + UCharsetDetector* _csd; + +public: + CharsetDetector() noexcept; + ~CharsetDetector() noexcept; + + bool IsAvailable() const noexcept { + return _icuin.IsAvailable(); + } + + ECodeType Detect(const std::string_view& bytes); +}; diff --git a/sakura_core/extmodule/CIcu4cI18n.cpp b/sakura_core/extmodule/CIcu4cI18n.cpp new file mode 100644 index 0000000000..8dd3add3ec --- /dev/null +++ b/sakura_core/extmodule/CIcu4cI18n.cpp @@ -0,0 +1,69 @@ +/*! @file */ +/* + Copyright (C) 2018-2019 Sakura Editor Organization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ +#include "StdAfx.h" +#include "CIcu4cI18n.h" + +CIcu4cI18n::CIcu4cI18n() noexcept + : _ucsdet_open(nullptr) + , _ucsdet_setText(nullptr) + , _ucsdet_detect(nullptr) + , _ucsdet_close(nullptr) +{ +} + +CIcu4cI18n::~CIcu4cI18n() noexcept +{ +} + +/*! + * @brief DLLの名前を返す + */ +LPCWSTR CIcu4cI18n::GetDllNameImp(int index) +{ + (void*)index; + return L"icuin66.dll"; //バージョンは固定 +} + +/*! + DLLの初期化 + + 関数のアドレスを取得してメンバに保管する. + + @retval true 成功 + @retval false アドレス取得に失敗 +*/ +bool CIcu4cI18n::InitDllImp() +{ + //DLL内関数名リスト + const ImportTable table[] = { + { &_ucsdet_open, "ucsdet_open_66" }, //バージョンは固定 + { &_ucsdet_setText, "ucsdet_setText_66" }, //バージョンは固定 + { &_ucsdet_detect, "ucsdet_detect_66" }, //バージョンは固定 + { &_ucsdet_getName, "ucsdet_getName_66" }, //バージョンは固定 + { &_ucsdet_close, "ucsdet_close_66" }, //バージョンは固定 + { NULL, 0 } + }; + return RegisterEntries(table); +} diff --git a/sakura_core/extmodule/CIcu4cI18n.h b/sakura_core/extmodule/CIcu4cI18n.h new file mode 100644 index 0000000000..b64daec993 --- /dev/null +++ b/sakura_core/extmodule/CIcu4cI18n.h @@ -0,0 +1,81 @@ +/*! @file */ +/* + Copyright (C) 2018-2019 Sakura Editor Organization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ +#pragma once + +#include "CDllHandler.h" + +//ICU4Cの型定義 +class UCharsetDetector; +class UCharsetMatch; + +typedef enum UErrorCode { + U_ZERO_ERROR = 0, /**< No error, no warning. */ +} UErrorCode; + +/*! + * ICU4C の i18n ライブラリ(icuin.dll) をラップするクラス + */ +class CIcu4cI18n final : public CDllImp +{ + // DLL関数型定義 + typedef UCharsetDetector* (_cdecl *ucsdet_open_t)(UErrorCode *status); + typedef void (_cdecl *ucsdet_setText_t)(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); + typedef const UCharsetMatch * (_cdecl *ucsdet_detect_t)(UCharsetDetector *ucsd, UErrorCode *status); + typedef const char* (_cdecl *ucsdet_getName_t)(const UCharsetMatch *ucsm, UErrorCode *status); + typedef void (_cdecl *ucsdet_close_t)(UCharsetDetector *ucsd); + + // メンバ定義 + ucsdet_open_t _ucsdet_open; + ucsdet_setText_t _ucsdet_setText; + ucsdet_detect_t _ucsdet_detect; + ucsdet_getName_t _ucsdet_getName; + ucsdet_close_t _ucsdet_close; + +public: + CIcu4cI18n() noexcept; + virtual ~CIcu4cI18n() noexcept; + +protected: + // CDllImpインタフェース + LPCWSTR GetDllNameImp(int nIndex) override; + bool InitDllImp() override; + +public: + inline UCharsetDetector* ucsdet_open(UErrorCode *status) const { + return _ucsdet_open(status); + } + inline void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status) const { + return _ucsdet_setText(ucsd, textIn, len, status); + } + inline const UCharsetMatch* ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status) const { + return _ucsdet_detect(ucsd, status); + } + inline const char* ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status) const { + return _ucsdet_getName(ucsm, status); + } + inline void ucsdet_close(UCharsetDetector *ucsd) const { + return _ucsdet_close(ucsd); + } +};