-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathucs2.go
70 lines (63 loc) · 1.69 KB
/
ucs2.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// SPDX-License-Identifier: MIT
//
// Copyright © 2018 Kent Gibson <warthog618@gmail.com>.
// Package ucs2 provides conversions between UCS-2 and UTF-8.
package ucs2
import (
"encoding/binary"
"errors"
"fmt"
"unicode/utf16"
)
// Decode converts an array of UCS2 characters into an array of runes.
//
// As the UCS2 characters are packed into a byte array, the length of the byte
// array provided must be even.
func Decode(src []byte) ([]rune, error) {
if len(src) == 0 {
return nil, nil
}
if len(src)&0x01 == 0x01 {
return nil, ErrInvalidLength
}
l := len(src) / 2
dst := make([]rune, 0, l)
for ri := 0; ri < len(src)-1; ri = ri + 2 {
r := rune(binary.BigEndian.Uint16(src[ri:]))
if utf16.IsSurrogate(r) {
if ri >= len(src)-3 {
return dst, ErrDanglingSurrogate(src[ri:])
}
ri += 2
r2 := rune(binary.BigEndian.Uint16(src[ri:]))
r = utf16.DecodeRune(r, r2)
}
dst = append(dst, r)
}
return dst, nil
}
// Encode converts an array of UCS2 runes into an array of bytes, where pairs
// of bytes (in Big Endian) represent a UCS2 character.
func Encode(src []rune) []byte {
if len(src) == 0 {
return nil
}
u := utf16.Encode(src)
dst := make([]byte, len(u)*2)
wi := 0
for _, r := range u {
binary.BigEndian.PutUint16(dst[wi:], uint16(r))
wi += 2
}
return dst
}
// ErrDanglingSurrogate indicates only half of a surrogate pair is provided at
// the end of the byte array being decoded.
type ErrDanglingSurrogate []byte
func (e ErrDanglingSurrogate) Error() string {
return fmt.Sprintf("ucs2: dangling surrogate: %#v", []byte(e))
}
var (
// ErrInvalidLength indicates the binary provided has an invalid (odd) length.
ErrInvalidLength = errors.New("ucs2: length must be even")
)