Skip to content
This repository has been archived by the owner on Jan 17, 2024. It is now read-only.

Commit

Permalink
Helpers for null-terminated Utf8 (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
sjindel-google authored Sep 9, 2019
1 parent d6daaec commit 9c3cb7b
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.dart_tool
.packages
pubspec.lock
5 changes: 5 additions & 0 deletions lib/ffi.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

export 'src/utf8.dart';
66 changes: 66 additions & 0 deletions lib/src/utf8.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:ffi';
import 'dart:typed_data';

const int _kMaxSmi64 = (1 << 62) - 1;
const int _kMaxSmi32 = (1 << 30) - 1;
final int _maxSize = sizeOf<IntPtr>() == 8 ? _kMaxSmi64 : _kMaxSmi32;

/// [Utf8] implements conversion between Dart strings and null-terminated
/// Utf8-encoded "char*" strings in C.
///
/// [Utf8] is respresented as a struct so that `Pointer<Utf8>` can be used in
/// native function signatures.
//
// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use
// 'asExternalTypedData' when Pointer operations are performant.
class Utf8 extends Struct<Utf8> {
/// Returns the length of a null-terminated string -- the number of (one-byte)
/// characters before the first null byte.
static int strlen(Pointer<Utf8> string) {
final Pointer<Uint8> array = string.cast<Uint8>();
final Uint8List nativeString = array.asExternalTypedData(count: _maxSize);
return nativeString.indexWhere((char) => char == 0);
}

/// Creates a [String] containing the characters UTF-8 encoded in [string].
///
/// The [string] must be a zero-terminated byte sequence of valid UTF-8
/// encodings of Unicode code points. It may also contain UTF-8 encodings of
/// unpaired surrogate code points, which is not otherwise valid UTF-8, but
/// which may be created when encoding a Dart string containing an unpaired
/// surrogate. See [Utf8Decoder] for details on decoding.
///
/// Returns a Dart string containing the decoded code points.
static String fromUtf8(Pointer<Utf8> string) {
final int length = strlen(string);
return utf8.decode(Uint8List.view(
string.cast<Uint8>().asExternalTypedData(count: length).buffer,
0,
length));
}

/// Convert a [String] to a Utf8-encoded null-terminated C string.
///
/// If 'string' contains NULL bytes, the converted string will be truncated
/// prematurely. Unpaired surrogate code points in [string] will be preserved
/// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding.
///
/// Returns a malloc-allocated pointer to the result.
static Pointer<Utf8> toUtf8(String string) {
final units = utf8.encode(string);
final Pointer<Uint8> result =
Pointer<Uint8>.allocate(count: units.length + 1);
final Uint8List nativeString =
result.asExternalTypedData(count: units.length + 1);
nativeString.setAll(0, units);
nativeString[units.length] = 0;
return result.cast();
}

String toString() => fromUtf8(addressOf);
}
1 change: 1 addition & 0 deletions pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dependencies:

dev_dependencies:
pedantic: ^1.0.0
test: ^1.6.8
77 changes: 77 additions & 0 deletions test/utf8_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:ffi';
import 'dart:typed_data';

import 'package:test/test.dart';
import 'package:ffi/ffi.dart';

Pointer<Uint8> _bytesFromList(List<int> ints) {
final Pointer<Uint8> ptr = Pointer.allocate(count: ints.length);
final Uint8List list = ptr.asExternalTypedData(count: ints.length);
list.setAll(0, ints);
return ptr;
}

main() {
test("toUtf8 ASCII", () {
final String start = "Hello World!\n";
final Pointer<Uint8> converted = Utf8.toUtf8(start).cast();
final Uint8List end =
converted.asExternalTypedData(count: start.length + 1);
final matcher =
equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]);
expect(end, matcher);
converted.free();
});

test("fromUtf8 ASCII", () {
final Pointer<Utf8> utf8 = _bytesFromList(
[72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, "Hello World!\n");
});

test("toUtf8 emoji", () {
final String start = "😎👿💬";
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
final int length = Utf8.strlen(converted);
final Uint8List end =
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
final matcher =
equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]);
expect(end, matcher);
converted.free();
});

test("formUtf8 emoji", () {
final Pointer<Utf8> utf8 = _bytesFromList(
[240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, "😎👿💬");
});

test("toUtf8 unpaired surrogate", () {
final String start = String.fromCharCodes([0xD800, 0x1000]);
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
final int length = Utf8.strlen(converted);
final Uint8List end =
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
expect(end, equals([237, 160, 128, 225, 128, 128, 0]));
converted.free();
});

test("fromUtf8 unpaired surrogate", () {
final Pointer<Utf8> utf8 =
_bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, equals(String.fromCharCodes([0xD800, 0x1000])));
});

test("fromUtf8 invalid", () {
final Pointer<Utf8> utf8 = _bytesFromList([0x80, 0x00]).cast();
expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException));
});
}

0 comments on commit 9c3cb7b

Please sign in to comment.