This repository has been archived by the owner on Jan 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Helpers for null-terminated Utf8 (#3)
- Loading branch information
1 parent
d6daaec
commit 9c3cb7b
Showing
5 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.dart_tool | ||
.packages | ||
pubspec.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
export 'src/utf8.dart'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
import 'dart:convert'; | ||
import 'dart:ffi'; | ||
import 'dart:typed_data'; | ||
|
||
const int _kMaxSmi64 = (1 << 62) - 1; | ||
const int _kMaxSmi32 = (1 << 30) - 1; | ||
final int _maxSize = sizeOf<IntPtr>() == 8 ? _kMaxSmi64 : _kMaxSmi32; | ||
|
||
/// [Utf8] implements conversion between Dart strings and null-terminated | ||
/// Utf8-encoded "char*" strings in C. | ||
/// | ||
/// [Utf8] is respresented as a struct so that `Pointer<Utf8>` can be used in | ||
/// native function signatures. | ||
// | ||
// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use | ||
// 'asExternalTypedData' when Pointer operations are performant. | ||
class Utf8 extends Struct<Utf8> { | ||
/// Returns the length of a null-terminated string -- the number of (one-byte) | ||
/// characters before the first null byte. | ||
static int strlen(Pointer<Utf8> string) { | ||
final Pointer<Uint8> array = string.cast<Uint8>(); | ||
final Uint8List nativeString = array.asExternalTypedData(count: _maxSize); | ||
return nativeString.indexWhere((char) => char == 0); | ||
} | ||
|
||
/// Creates a [String] containing the characters UTF-8 encoded in [string]. | ||
/// | ||
/// The [string] must be a zero-terminated byte sequence of valid UTF-8 | ||
/// encodings of Unicode code points. It may also contain UTF-8 encodings of | ||
/// unpaired surrogate code points, which is not otherwise valid UTF-8, but | ||
/// which may be created when encoding a Dart string containing an unpaired | ||
/// surrogate. See [Utf8Decoder] for details on decoding. | ||
/// | ||
/// Returns a Dart string containing the decoded code points. | ||
static String fromUtf8(Pointer<Utf8> string) { | ||
final int length = strlen(string); | ||
return utf8.decode(Uint8List.view( | ||
string.cast<Uint8>().asExternalTypedData(count: length).buffer, | ||
0, | ||
length)); | ||
} | ||
|
||
/// Convert a [String] to a Utf8-encoded null-terminated C string. | ||
/// | ||
/// If 'string' contains NULL bytes, the converted string will be truncated | ||
/// prematurely. Unpaired surrogate code points in [string] will be preserved | ||
/// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding. | ||
/// | ||
/// Returns a malloc-allocated pointer to the result. | ||
static Pointer<Utf8> toUtf8(String string) { | ||
final units = utf8.encode(string); | ||
final Pointer<Uint8> result = | ||
Pointer<Uint8>.allocate(count: units.length + 1); | ||
final Uint8List nativeString = | ||
result.asExternalTypedData(count: units.length + 1); | ||
nativeString.setAll(0, units); | ||
nativeString[units.length] = 0; | ||
return result.cast(); | ||
} | ||
|
||
String toString() => fromUtf8(addressOf); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ dependencies: | |
|
||
dev_dependencies: | ||
pedantic: ^1.0.0 | ||
test: ^1.6.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
import 'dart:ffi'; | ||
import 'dart:typed_data'; | ||
|
||
import 'package:test/test.dart'; | ||
import 'package:ffi/ffi.dart'; | ||
|
||
Pointer<Uint8> _bytesFromList(List<int> ints) { | ||
final Pointer<Uint8> ptr = Pointer.allocate(count: ints.length); | ||
final Uint8List list = ptr.asExternalTypedData(count: ints.length); | ||
list.setAll(0, ints); | ||
return ptr; | ||
} | ||
|
||
main() { | ||
test("toUtf8 ASCII", () { | ||
final String start = "Hello World!\n"; | ||
final Pointer<Uint8> converted = Utf8.toUtf8(start).cast(); | ||
final Uint8List end = | ||
converted.asExternalTypedData(count: start.length + 1); | ||
final matcher = | ||
equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]); | ||
expect(end, matcher); | ||
converted.free(); | ||
}); | ||
|
||
test("fromUtf8 ASCII", () { | ||
final Pointer<Utf8> utf8 = _bytesFromList( | ||
[72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast(); | ||
final String end = Utf8.fromUtf8(utf8); | ||
expect(end, "Hello World!\n"); | ||
}); | ||
|
||
test("toUtf8 emoji", () { | ||
final String start = "😎👿💬"; | ||
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast(); | ||
final int length = Utf8.strlen(converted); | ||
final Uint8List end = | ||
converted.cast<Uint8>().asExternalTypedData(count: length + 1); | ||
final matcher = | ||
equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]); | ||
expect(end, matcher); | ||
converted.free(); | ||
}); | ||
|
||
test("formUtf8 emoji", () { | ||
final Pointer<Utf8> utf8 = _bytesFromList( | ||
[240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast(); | ||
final String end = Utf8.fromUtf8(utf8); | ||
expect(end, "😎👿💬"); | ||
}); | ||
|
||
test("toUtf8 unpaired surrogate", () { | ||
final String start = String.fromCharCodes([0xD800, 0x1000]); | ||
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast(); | ||
final int length = Utf8.strlen(converted); | ||
final Uint8List end = | ||
converted.cast<Uint8>().asExternalTypedData(count: length + 1); | ||
expect(end, equals([237, 160, 128, 225, 128, 128, 0])); | ||
converted.free(); | ||
}); | ||
|
||
test("fromUtf8 unpaired surrogate", () { | ||
final Pointer<Utf8> utf8 = | ||
_bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast(); | ||
final String end = Utf8.fromUtf8(utf8); | ||
expect(end, equals(String.fromCharCodes([0xD800, 0x1000]))); | ||
}); | ||
|
||
test("fromUtf8 invalid", () { | ||
final Pointer<Utf8> utf8 = _bytesFromList([0x80, 0x00]).cast(); | ||
expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException)); | ||
}); | ||
} |