-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathHTMLEncodingResolver.h
58 lines (46 loc) · 1.99 KB
/
HTMLEncodingResolver.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/************************************************************************
**
** Copyright (C) 2016-2024 Kevin B. Hendricks, Stratford Ontario Canada
** Copyright (C) 2009-2011 Strahinja Markovic <strahinja.markovic@gmail.com>
**
** This file is part of PageEdit.
**
** PageEdit is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** PageEdit is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with PageEdit. If not, see <http://www.gnu.org/licenses/>.
**
*************************************************************************/
#pragma once
#ifndef HTMLEncodingResolver_H
#define HTMLEncodingResolver_H
#include <QStringDecoder>
class QString;
class HTMLEncodingResolver
{
public:
// Accepts a full path to an HTML file.
// Reads the file, detects the encoding
// and returns the text converted to Unicode.
static QString ReadHTMLFile(const QString &fullfilepath);
private:
// Accepts an HTML stream and tries to determine its encoding;
// if no encoding is detected, the default codec for this locale is returned.
// We use this function because Qt's QTextCodec::codecForHtml() function
// leaves a *lot* to be desired.
static QStringDecoder GetDecoderForHTML(const QByteArray &raw_text);
// This function goes through the entire byte array
// and tries to see whether this is a valid UTF-8 sequence.
// If it's valid, this is probably a UTF-8 string.
static bool IsValidUtf8(const QByteArray &string);
static QByteArray FixupCodePageMapping(const QByteArray& ba);
};
#endif // HTMLEncodingResolver_H