diff --git a/sakura/sakura.vcxproj b/sakura/sakura.vcxproj
index cf905c0f9f..a89b337a14 100644
--- a/sakura/sakura.vcxproj
+++ b/sakura/sakura.vcxproj
@@ -303,6 +303,7 @@
+
@@ -413,6 +414,7 @@
+
@@ -647,6 +649,7 @@
+
@@ -776,6 +779,7 @@
+
diff --git a/sakura/sakura.vcxproj.filters b/sakura/sakura.vcxproj.filters
index 0b21690eb5..be2661ada1 100644
--- a/sakura/sakura.vcxproj.filters
+++ b/sakura/sakura.vcxproj.filters
@@ -119,6 +119,9 @@
{930f3f82-ab3f-49e3-af4a-d4f9c2d51f46}
+
+ {e4629f85-3be8-4dda-80db-1be310929433}
+
@@ -1085,6 +1088,12 @@
Cpp Source Files\mem
+
+ Cpp Source Files\extmodule
+
+
+ Cpp Source Files\charset\icu4c
+
@@ -2252,6 +2261,12 @@
Cpp Source Files\dlg
+
+ Cpp Source Files\extmodule
+
+
+ Cpp Source Files\charset\icu4c
+
diff --git a/sakura_core/Makefile b/sakura_core/Makefile
index 5ea88e13f6..55d68a600b 100644
--- a/sakura_core/Makefile
+++ b/sakura_core/Makefile
@@ -115,6 +115,7 @@ charset/CUnicode.o \
charset/CUnicodeBe.o \
charset/CUtf7.o \
charset/CUtf8.o \
+charset/icu4c/CharsetDetector.o \
cmd/CViewCommander.o \
cmd/CViewCommander_Bookmark.o \
cmd/CViewCommander_Clipboard.o \
@@ -228,6 +229,7 @@ extmodule/CBregexp.o \
extmodule/CBregexpDll2.o \
extmodule/CDllHandler.o \
extmodule/CHtmlHelp.o \
+extmodule/CIcu4cI18n.o \
extmodule/CMigemo.o \
extmodule/CUxTheme.o \
func/CFuncKeyWnd.o \
diff --git a/sakura_core/charset/CCodeMediator.cpp b/sakura_core/charset/CCodeMediator.cpp
index ebfa41b87f..c978dd2b51 100644
--- a/sakura_core/charset/CCodeMediator.cpp
+++ b/sakura_core/charset/CCodeMediator.cpp
@@ -1,6 +1,7 @@
/*! @file */
#include "StdAfx.h"
#include "charset/CCodeMediator.h"
+#include "charset/icu4c/CharsetDetector.h"
#include "charset/CESI.h"
#include "io/CBinaryStream.h"
@@ -23,6 +24,13 @@ ECodeType CCodeMediator::CheckKanjiCode(const char* buff, size_t size) noexcept
return m_sEncodingConfig.m_eDefaultCodetype;
}
+ // ICU4CのDLL群が利用できる場合、ICU4Cによる判定を試みる
+ CharsetDetector csd;
+ if (csd.IsAvailable()) {
+ auto code = csd.Detect(std::string_view(buff, size));
+ if (code != CODE_ERROR) return code;
+ }
+
CESI cesi(m_sEncodingConfig);
return cesi.CheckKanjiCode(buff, size);
}
diff --git a/sakura_core/charset/icu4c/CharsetDetector.cpp b/sakura_core/charset/icu4c/CharsetDetector.cpp
new file mode 100644
index 0000000000..0979e4b91f
--- /dev/null
+++ b/sakura_core/charset/icu4c/CharsetDetector.cpp
@@ -0,0 +1,77 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2019 Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#include "StdAfx.h"
+#include "CharsetDetector.h"
+
+CharsetDetector::CharsetDetector() noexcept
+ : _icuin()
+ , _csd(nullptr)
+{
+ _icuin.InitDll();
+}
+
+CharsetDetector::~CharsetDetector() noexcept
+{
+ if (_icuin.IsAvailable()) {
+ _icuin.ucsdet_close(_csd);
+ }
+}
+
+ECodeType CharsetDetector::Detect(const std::string_view& bytes)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ _csd = _icuin.ucsdet_open(&status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ _icuin.ucsdet_setText(_csd, bytes.data(), bytes.length(), &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ const auto csm = _icuin.ucsdet_detect(_csd, &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ std::string_view name = _icuin.ucsdet_getName(csm, &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ // 文字セット名⇒サクラエディタ内部コードの変換
+ if (name == "UTF-8") return CODE_UTF8;
+ if (name == "SHIFT_JIS") return CODE_SJIS;
+ if (name == "UTF-16BE") return CODE_UNICODEBE;
+ if (name == "UTF-16LE") return CODE_UNICODE;
+ if (name == "EUC-JP") return CODE_EUC;
+ if (name == "ISO-2022-JP") return CODE_JIS;
+ if (name == "UTF-7") return CODE_UTF7;
+ if (name == "ISO-8859-1") return CODE_LATIN1;
+
+ return CODE_ERROR;
+}
diff --git a/sakura_core/charset/icu4c/CharsetDetector.h b/sakura_core/charset/icu4c/CharsetDetector.h
new file mode 100644
index 0000000000..e43915a4d0
--- /dev/null
+++ b/sakura_core/charset/icu4c/CharsetDetector.h
@@ -0,0 +1,48 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2019 Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#pragma once
+
+#include
+
+#include "extmodule/CIcu4cI18n.h"
+
+/*!
+ * @brief 文字コード検出クラス
+ */
+class CharsetDetector final
+{
+ CIcu4cI18n _icuin;
+ UCharsetDetector* _csd;
+
+public:
+ CharsetDetector() noexcept;
+ ~CharsetDetector() noexcept;
+
+ bool IsAvailable() const noexcept {
+ return _icuin.IsAvailable();
+ }
+
+ ECodeType Detect(const std::string_view& bytes);
+};
diff --git a/sakura_core/extmodule/CIcu4cI18n.cpp b/sakura_core/extmodule/CIcu4cI18n.cpp
new file mode 100644
index 0000000000..8dd3add3ec
--- /dev/null
+++ b/sakura_core/extmodule/CIcu4cI18n.cpp
@@ -0,0 +1,69 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2019 Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#include "StdAfx.h"
+#include "CIcu4cI18n.h"
+
+CIcu4cI18n::CIcu4cI18n() noexcept
+ : _ucsdet_open(nullptr)
+ , _ucsdet_setText(nullptr)
+ , _ucsdet_detect(nullptr)
+ , _ucsdet_close(nullptr)
+{
+}
+
+CIcu4cI18n::~CIcu4cI18n() noexcept
+{
+}
+
+/*!
+ * @brief DLLの名前を返す
+ */
+LPCWSTR CIcu4cI18n::GetDllNameImp(int index)
+{
+ (void*)index;
+ return L"icuin66.dll"; //バージョンは固定
+}
+
+/*!
+ DLLの初期化
+
+ 関数のアドレスを取得してメンバに保管する.
+
+ @retval true 成功
+ @retval false アドレス取得に失敗
+*/
+bool CIcu4cI18n::InitDllImp()
+{
+ //DLL内関数名リスト
+ const ImportTable table[] = {
+ { &_ucsdet_open, "ucsdet_open_66" }, //バージョンは固定
+ { &_ucsdet_setText, "ucsdet_setText_66" }, //バージョンは固定
+ { &_ucsdet_detect, "ucsdet_detect_66" }, //バージョンは固定
+ { &_ucsdet_getName, "ucsdet_getName_66" }, //バージョンは固定
+ { &_ucsdet_close, "ucsdet_close_66" }, //バージョンは固定
+ { NULL, 0 }
+ };
+ return RegisterEntries(table);
+}
diff --git a/sakura_core/extmodule/CIcu4cI18n.h b/sakura_core/extmodule/CIcu4cI18n.h
new file mode 100644
index 0000000000..b64daec993
--- /dev/null
+++ b/sakura_core/extmodule/CIcu4cI18n.h
@@ -0,0 +1,81 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2019 Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#pragma once
+
+#include "CDllHandler.h"
+
+//ICU4Cの型定義
+class UCharsetDetector;
+class UCharsetMatch;
+
+typedef enum UErrorCode {
+ U_ZERO_ERROR = 0, /**< No error, no warning. */
+} UErrorCode;
+
+/*!
+ * ICU4C の i18n ライブラリ(icuin.dll) をラップするクラス
+ */
+class CIcu4cI18n final : public CDllImp
+{
+ // DLL関数型定義
+ typedef UCharsetDetector* (_cdecl *ucsdet_open_t)(UErrorCode *status);
+ typedef void (_cdecl *ucsdet_setText_t)(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+ typedef const UCharsetMatch * (_cdecl *ucsdet_detect_t)(UCharsetDetector *ucsd, UErrorCode *status);
+ typedef const char* (_cdecl *ucsdet_getName_t)(const UCharsetMatch *ucsm, UErrorCode *status);
+ typedef void (_cdecl *ucsdet_close_t)(UCharsetDetector *ucsd);
+
+ // メンバ定義
+ ucsdet_open_t _ucsdet_open;
+ ucsdet_setText_t _ucsdet_setText;
+ ucsdet_detect_t _ucsdet_detect;
+ ucsdet_getName_t _ucsdet_getName;
+ ucsdet_close_t _ucsdet_close;
+
+public:
+ CIcu4cI18n() noexcept;
+ virtual ~CIcu4cI18n() noexcept;
+
+protected:
+ // CDllImpインタフェース
+ LPCWSTR GetDllNameImp(int nIndex) override;
+ bool InitDllImp() override;
+
+public:
+ inline UCharsetDetector* ucsdet_open(UErrorCode *status) const {
+ return _ucsdet_open(status);
+ }
+ inline void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status) const {
+ return _ucsdet_setText(ucsd, textIn, len, status);
+ }
+ inline const UCharsetMatch* ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status) const {
+ return _ucsdet_detect(ucsd, status);
+ }
+ inline const char* ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status) const {
+ return _ucsdet_getName(ucsm, status);
+ }
+ inline void ucsdet_close(UCharsetDetector *ucsd) const {
+ return _ucsdet_close(ucsd);
+ }
+};