-
Notifications
You must be signed in to change notification settings - Fork 1
/
misc.php
145 lines (142 loc) · 6.73 KB
/
misc.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
<?php
/**
* Converts a Windows-1252 encoded string to a UTF-8 encoded string
*
* @static
* @param string $string Windows-1252 encoded string
* @return string UTF-8 encoded string
*/
public static function windows_1252_to_utf8($string)
{
static $convert_table = array("\x80" => "\xE2\x82\xAC", "\x81" => "\xEF\xBF\xBD", "\x82" => "\xE2\x80\x9A", "\x83" => "\xC6\x92", "\x84" => "\xE2\x80\x9E", "\x85" => "\xE2\x80\xA6", "\x86" => "\xE2\x80\xA0", "\x87" => "\xE2\x80\xA1", "\x88" => "\xCB\x86", "\x89" => "\xE2\x80\xB0", "\x8A" => "\xC5\xA0", "\x8B" => "\xE2\x80\xB9", "\x8C" => "\xC5\x92", "\x8D" => "\xEF\xBF\xBD", "\x8E" => "\xC5\xBD", "\x8F" => "\xEF\xBF\xBD", "\x90" => "\xEF\xBF\xBD", "\x91" => "\xE2\x80\x98", "\x92" => "\xE2\x80\x99", "\x93" => "\xE2\x80\x9C", "\x94" => "\xE2\x80\x9D", "\x95" => "\xE2\x80\xA2", "\x96" => "\xE2\x80\x93", "\x97" => "\xE2\x80\x94", "\x98" => "\xCB\x9C", "\x99" => "\xE2\x84\xA2", "\x9A" => "\xC5\xA1", "\x9B" => "\xE2\x80\xBA", "\x9C" => "\xC5\x93", "\x9D" => "\xEF\xBF\xBD", "\x9E" => "\xC5\xBE", "\x9F" => "\xC5\xB8", "\xA0" => "\xC2\xA0", "\xA1" => "\xC2\xA1", "\xA2" => "\xC2\xA2", "\xA3" => "\xC2\xA3", "\xA4" => "\xC2\xA4", "\xA5" => "\xC2\xA5", "\xA6" => "\xC2\xA6", "\xA7" => "\xC2\xA7", "\xA8" => "\xC2\xA8", "\xA9" => "\xC2\xA9", "\xAA" => "\xC2\xAA", "\xAB" => "\xC2\xAB", "\xAC" => "\xC2\xAC", "\xAD" => "\xC2\xAD", "\xAE" => "\xC2\xAE", "\xAF" => "\xC2\xAF", "\xB0" => "\xC2\xB0", "\xB1" => "\xC2\xB1", "\xB2" => "\xC2\xB2", "\xB3" => "\xC2\xB3", "\xB4" => "\xC2\xB4", "\xB5" => "\xC2\xB5", "\xB6" => "\xC2\xB6", "\xB7" => "\xC2\xB7", "\xB8" => "\xC2\xB8", "\xB9" => "\xC2\xB9", "\xBA" => "\xC2\xBA", "\xBB" => "\xC2\xBB", "\xBC" => "\xC2\xBC", "\xBD" => "\xC2\xBD", "\xBE" => "\xC2\xBE", "\xBF" => "\xC2\xBF", "\xC0" => "\xC3\x80", "\xC1" => "\xC3\x81", "\xC2" => "\xC3\x82", "\xC3" => "\xC3\x83", "\xC4" => "\xC3\x84", "\xC5" => "\xC3\x85", "\xC6" => "\xC3\x86", "\xC7" => "\xC3\x87", "\xC8" => "\xC3\x88", "\xC9" => "\xC3\x89", "\xCA" => "\xC3\x8A", "\xCB" => "\xC3\x8B", "\xCC" => "\xC3\x8C", "\xCD" => "\xC3\x8D", "\xCE" => "\xC3\x8E", "\xCF" => "\xC3\x8F", "\xD0" => "\xC3\x90", "\xD1" => "\xC3\x91", "\xD2" => "\xC3\x92", "\xD3" => "\xC3\x93", "\xD4" => "\xC3\x94", "\xD5" => "\xC3\x95", "\xD6" => "\xC3\x96", "\xD7" => "\xC3\x97", "\xD8" => "\xC3\x98", "\xD9" => "\xC3\x99", "\xDA" => "\xC3\x9A", "\xDB" => "\xC3\x9B", "\xDC" => "\xC3\x9C", "\xDD" => "\xC3\x9D", "\xDE" => "\xC3\x9E", "\xDF" => "\xC3\x9F", "\xE0" => "\xC3\xA0", "\xE1" => "\xC3\xA1", "\xE2" => "\xC3\xA2", "\xE3" => "\xC3\xA3", "\xE4" => "\xC3\xA4", "\xE5" => "\xC3\xA5", "\xE6" => "\xC3\xA6", "\xE7" => "\xC3\xA7", "\xE8" => "\xC3\xA8", "\xE9" => "\xC3\xA9", "\xEA" => "\xC3\xAA", "\xEB" => "\xC3\xAB", "\xEC" => "\xC3\xAC", "\xED" => "\xC3\xAD", "\xEE" => "\xC3\xAE", "\xEF" => "\xC3\xAF", "\xF0" => "\xC3\xB0", "\xF1" => "\xC3\xB1", "\xF2" => "\xC3\xB2", "\xF3" => "\xC3\xB3", "\xF4" => "\xC3\xB4", "\xF5" => "\xC3\xB5", "\xF6" => "\xC3\xB6", "\xF7" => "\xC3\xB7", "\xF8" => "\xC3\xB8", "\xF9" => "\xC3\xB9", "\xFA" => "\xC3\xBA", "\xFB" => "\xC3\xBB", "\xFC" => "\xC3\xBC", "\xFD" => "\xC3\xBD", "\xFE" => "\xC3\xBE", "\xFF" => "\xC3\xBF");
return strtr($string, $convert_table);
}
/**
* Change a string from one encoding to another
*
* @param string $data Raw data in $input encoding
* @param string $input Encoding of $data
* @param string $output Encoding you want
* @return string|boolean False if we can't convert it
*/
public static function change_encoding($data, $input, $output)
{
$input = SimplePie_Misc::encoding($input);
$output = SimplePie_Misc::encoding($output);
// We fail to fail on non US-ASCII bytes
if ($input === 'US-ASCII')
{
static $non_ascii_octects = '';
if (!$non_ascii_octects)
{
for ($i = 0x80; $i <= 0xFF; $i++)
{
$non_ascii_octects .= chr($i);
}
}
$data = substr($data, 0, strcspn($data, $non_ascii_octects));
}
// This is first, as behaviour of this is completely predictable
if ($input === 'windows-1252' && $output === 'UTF-8')
{
return SimplePie_Misc::windows_1252_to_utf8($data);
}
// This is second, as behaviour of this varies only with PHP version (the middle part of this expression checks the encoding is supported).
elseif (function_exists('mb_convert_encoding') && ($return = SimplePie_Misc::change_encoding_mbstring($data, $input, $output)))
{
return $return;
}
// This is third, as behaviour of this varies with OS userland and PHP version
elseif (function_exists('iconv') && ($return = SimplePie_Misc::change_encoding_iconv($data, $input, $output)))
{
return $return;
}
// This is last, as behaviour of this varies with OS userland and PHP version
elseif (class_exists('\UConverter') && ($return = SimplePie_Misc::change_encoding_uconverter($data, $input, $output)))
{
return $return;
}
// If we can't do anything, just fail
return false;
}
protected static function change_encoding_mbstring($data, $input, $output)
{
if ($input === 'windows-949')
{
$input = 'EUC-KR';
}
if ($output === 'windows-949')
{
$output = 'EUC-KR';
}
if ($input === 'Windows-31J')
{
$input = 'SJIS';
}
if ($output === 'Windows-31J')
{
$output = 'SJIS';
}
// Check that the encoding is supported
if (@mb_convert_encoding("\x80", 'UTF-16BE', $input) === "\x00\x80")
{
return false;
}
if (!in_array($input, mb_list_encodings()))
{
return false;
}
// Let's do some conversion
if ($return = @mb_convert_encoding($data, $output, $input))
{
return $return;
}
return false;
}
protected static function change_encoding_iconv($data, $input, $output)
{
return @iconv($input, $output, $data);
}
/**
* @param string $data
* @param string $input
* @param string $output
* @return string|false
*/
protected static function change_encoding_uconverter($data, $input, $output)
{
return @\UConverter::transcode($data, $output, $input);
}
/**
* Converts a unicode codepoint to a UTF-8 character
*
* @static
* @param int $codepoint Unicode codepoint
* @return string UTF-8 character
*/
public static function codepoint_to_utf8($codepoint)
{
$codepoint = (int) $codepoint;
if ($codepoint < 0)
{
return false;
}
else if ($codepoint <= 0x7f)
{
return chr($codepoint);
}
else if ($codepoint <= 0x7ff)
{
return chr(0xc0 | ($codepoint >> 6)) . chr(0x80 | ($codepoint & 0x3f));
}
else if ($codepoint <= 0xffff)
{
return chr(0xe0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f));
}
else if ($codepoint <= 0x10ffff)
{
return chr(0xf0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3f)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f));
}
// U+FFFD REPLACEMENT CHARACTER
return "\xEF\xBF\xBD";
}