From 49c29867db235128adcc1f482d2ff56aa1907f84 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Mon, 7 Oct 2024 06:53:52 +0200 Subject: [PATCH] Improve JSON converter public API --- CHANGELOG.md | 1 + docs/9.0/converter/json.md | 32 +++++- docs/9.0/reader/index.md | 4 + docs/9.0/reader/resultset.md | 4 + src/JsonConverter.php | 202 ++++++++++++++++++++++++----------- src/JsonConverterTest.php | 31 ++++-- src/Stream.php | 5 + 7 files changed, 208 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc1c1171..b4c8a75a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ All Notable changes to `Csv` will be documented in this file - `Cast*` methods accept more input type to improve Denormalization usage when `Reader::addFormatter` is used or when the collection contains data other than string and `null`. - `Stream::getSize` is added to the internal `Stream` class +- `Stream::getContents` is added to the internal `Stream` class - `MapIterator::toIterator` is added to the internal class `MapIterator` class to convert any `iterable` into an `Iterator`. - Casting a CSV to an `array` it now will be a collection of array instead of a simple `array`. diff --git a/docs/9.0/converter/json.md b/docs/9.0/converter/json.md index ea10c399..13c9f074 100644 --- a/docs/9.0/converter/json.md +++ b/docs/9.0/converter/json.md @@ -15,16 +15,37 @@ the converter.

Because we are building a JSON structure, the JsonConverter object throws generic SPL Exception instead of League\Csv\Exception.

-### JsonConverter::addFlags and JsonConverter::removeFlags +### JsonConverter::withFlags and JsonConverter::withoutFlags ```php -public JsonConverter::addFlags(int ...$flag): self -public JsonConverter::removeFlags(int ...$flag): self +public JsonConverter::withFlags(int ...$flag): self +public JsonConverter::withoutFlags(int ...$flag): self ``` This method sets the JSON flags to be used during conversion. The method handles all the flags supported by PHP `json_encode` function. +If you prefer a more expressive way for setting the flags you can use `with*` and `without*` methods +whose name are derived from PHP JSON constants. + +```php +$converter = JsonConverter::create() + ->withFlags(JSON_PRETTY_PRINT, JSON_UNESCAPED_SLASHES, JSON_FORCE_OBJECT) + ->withoutFlags(JSON_HEX_QUOT); + +//is equivalent to + +$converter = JsonConverter::create() + ->withPrettyPrint() + ->withUnescapedSlashes() + ->withForceObject() + ->withoutHexQuot(); +``` + +

Because we are converting one record at a time, the class always uses JSON_THROW_ON_ERROR +to stop the collection conversion. As such adding or removing the flag using the methods describe here before will +have no effect on its usage, the flag is ALWAYS set.

+ ### JsonConverter::depth ```php @@ -52,7 +73,7 @@ public JsonConverter::formatter(?callback $formatter): mixed This method allow to apply a callback prior to `json_encode` your collection individual item. Since the encoder does not rely on PHP's `JsonSerializable` interface but on PHP's `iterable` -structure. The expected conversion may differ to what you expect. This callback allows you to +structure. The resulting conversion may differ to what you expect. This callback allows you to specify how each item will be converted. The formatter should return a type that can be handled by PHP `json_encode` function. @@ -81,7 +102,8 @@ $document->setHeaderOffset(0); CharsetConverter::addTo($document, 'iso-8859-15', 'utf-8'); $converter = JsonConverter::create() - ->addFlags(JSON_PRETTY_PRINT, JSON_UNESCAPED_SLASHES) + ->withPrettyPrint() + ->withUnescapedSlashes() ->depth(2) ->indentSize(2) ->formatter(function (array $row) { diff --git a/docs/9.0/reader/index.md b/docs/9.0/reader/index.md index 47371930..cc7de0e9 100644 --- a/docs/9.0/reader/index.md +++ b/docs/9.0/reader/index.md @@ -324,6 +324,10 @@ found records are returned as a [ResultSet](/9.0/reader/resultset) object. ### Json serialization +

A dedicated JsonConverter class is added in version 9.17.0 +to help converting CSV into proper JSON document without consuming +too much memory. It is the recommended way to convert to JSON.

+ The `Reader` class implements the `JsonSerializable` interface. As such you can use the `json_encode` function directly on the instantiated object. The interface is implemented using PHP's `iterator_array` on the `Reader::getRecords` method. As such, the returned `JSON` diff --git a/docs/9.0/reader/resultset.md b/docs/9.0/reader/resultset.md index a2a6bf74..bcf0974b 100644 --- a/docs/9.0/reader/resultset.md +++ b/docs/9.0/reader/resultset.md @@ -21,6 +21,10 @@ found records are returned as a [ResultSet](/9.0/reader/resultset) object. ### Json serialization +

A dedicated JsonConverter class is added in version 9.17.0 +to help converting ResultSet into proper JSON document without consuming +too much memory. It is the recommended way to convert to JSON.

+ The `ResultSet` class implements the `JsonSerializable` interface. As such you can use the `json_encode` function directly on the instantiated object. The interface is implemented using PHP's `iterator_array` on the `ResultSet::getRecords` method. As such, the returned `JSON` string data is affected by the diff --git a/src/JsonConverter.php b/src/JsonConverter.php index b0d8adef..bd47f119 100644 --- a/src/JsonConverter.php +++ b/src/JsonConverter.php @@ -13,6 +13,7 @@ namespace League\Csv; +use BadMethodCallException; use Closure; use Exception; use InvalidArgumentException; @@ -22,6 +23,26 @@ use SplFileInfo; use SplFileObject; +use function array_filter; +use function array_reduce; +use function get_defined_constants; +use function in_array; +use function is_resource; +use function is_string; +use function json_encode; +use function json_last_error; +use function lcfirst; +use function restore_error_handler; +use function set_error_handler; +use function str_repeat; +use function str_replace; +use function str_starts_with; +use function strlen; +use function strtolower; +use function substr; +use function ucwords; + +use const ARRAY_FILTER_USE_KEY; use const JSON_ERROR_NONE; use const JSON_FORCE_OBJECT; use const JSON_PRETTY_PRINT; @@ -30,14 +51,43 @@ /** * Converts and store tabular data into a JSON string. * @template T + * + * @method JsonConverter withHexTag() adds the JSON_HEX_TAG flag + * @method JsonConverter withoutHexTag() removes the JSON_HEX_TAG flag + * @method JsonConverter withHexAmp() adds the JSON_HEX_AMP flag + * @method JsonConverter withoutHexAmp() removes the JSON_HEX_AMP flag + * @method JsonConverter withHexApos() adds the JSON_HEX_APOS flag + * @method JsonConverter withoutHexApos() removes the JSON_HEX_APOS flag + * @method JsonConverter withHexQuot() adds the JSON_HEX_QUOT flag + * @method JsonConverter withoutHexQuot() removes the JSON_HEX_QUOT flag + * @method JsonConverter withForceObject() adds the JSON_FORCE_OBJECT flag + * @method JsonConverter withoutForceObject() removes the JSON_FORCE_OBJECT flag + * @method JsonConverter withNumericCheck() adds the JSON_NUMERIC_CHECK flag + * @method JsonConverter withoutNumericCheck() removes the JSON_NUMERIC_CHECK flag + * @method JsonConverter withUnescapedSlashes() adds the JSON_UNESCAPED_SLASHES flag + * @method JsonConverter withoutUnescapedSlashes() removes the JSON_UNESCAPED_SLASHES flag + * @method JsonConverter withPrettyPrint() adds the JSON_PRETTY_PRINT flag + * @method JsonConverter withoutPrettyPrint() removes the JSON_PRETTY_PRINT flag + * @method JsonConverter withUnescapedUnicode() adds the JSON_UNESCAPED_UNICODE flag + * @method JsonConverter withoutUnescapedUnicode() removes the JSON_UNESCAPED_UNICODE flag + * @method JsonConverter withPartialOutputOnError() adds the JSON_PARTIAL_OUTPUT_ON_ERROR flag + * @method JsonConverter withoutPartialOutputOnError() removes the JSON_PARTIAL_OUTPUT_ON_ERROR flag + * @method JsonConverter withPreserveZeroFraction() adds the JSON_PRESERVE_ZERO_FRACTION flag + * @method JsonConverter withoutPreserveZeroFraction() removes the JSON_PRESERVE_ZERO_FRACTION flag + * @method JsonConverter withUnescapedLineTerminators() adds the JSON_UNESCAPED_LINE_TERMINATORS flag + * @method JsonConverter withoutUnescapedLineTerminators() removes the JSON_UNESCAPED_LINE_TERMINATORS flag + * @method JsonConverter withInvalidUtf8Ignore() adds the JSON_INVALID_UTF8_IGNORE flag + * @method JsonConverter withoutInvalidUtf8Ignore() removes the JSON_INVALID_UTF8_IGNORE flag + * @method JsonConverter withInvalidUtf8Substitute() adds the JSON_INVALID_UTF8_SUBSTITUTE flag + * @method JsonConverter withoutInvalidUtf8Substitute() removes the JSON_INVALID_UTF8_SUBSTITUTE flag */ final class JsonConverter { public readonly int $flags; /** @var int<1, max> */ public readonly int $depth; - /** @var non-empty-string */ - public readonly string $indentation; + /** @var int<1, max> */ + public readonly int $indentSize; /** @var Closure(T, array-key): mixed */ public readonly Closure $formatter; public readonly bool $isPrettyPrint; @@ -47,24 +97,15 @@ final class JsonConverter public static function create(): self { - return new self( - flags: 0, - depth: 512, - indentSize: 4, - formatter: null, - ); + return new self(flags: 0, depth: 512, indentSize: 4, formatter: null); } /** * @param int<1, max> $depth * @param int<1, max> $indentSize */ - private function __construct( - int $flags, - int $depth, - int $indentSize, - ?Closure $formatter - ) { + private function __construct(int $flags, int $depth, int $indentSize, ?Closure $formatter) + { json_encode([], $flags & ~JSON_THROW_ON_ERROR, $depth); JSON_ERROR_NONE === json_last_error() || throw new InvalidArgumentException('The flags or the depth given are not valid JSON encoding parameters in PHP; '.json_last_error_msg()); @@ -72,7 +113,7 @@ private function __construct( $this->flags = $flags; $this->depth = $depth; - $this->indentation = str_repeat(' ', $indentSize); + $this->indentSize = $indentSize; $this->formatter = $formatter ?? fn (mixed $value) => $value; $this->isPrettyPrint = ($this->flags & JSON_PRETTY_PRINT) === JSON_PRETTY_PRINT; $this->isForceObject = ($this->flags & JSON_FORCE_OBJECT) === JSON_FORCE_OBJECT; @@ -80,29 +121,73 @@ private function __construct( } /** - * Adds a list of JSON flags. + * @throws BadMethodCallException + */ + public function __call(string $name, array $arguments): self + { + return match (true) { + str_starts_with($name, 'without') => $this->withoutFlags(self::methodToFlag()[lcfirst(substr($name, 7))] ?? throw new BadMethodCallException('The method "'.self::class.'::'.$name.'" does not exist.')), + str_starts_with($name, 'with') => $this->withFlags(self::methodToFlag()[lcfirst(substr($name, 4))] ?? throw new BadMethodCallException('The method "'.self::class.'::'.$name.'" does not exist.')), + default => throw new BadMethodCallException('The method "'.self::class.'::'.$name.'" does not exist.'), + }; + } + + /** + * Returns the PHP json flag associated to its method suffix to ease method lookup. + * + * @return array */ - public function addFlags(int ...$flag): self + private static function methodToFlag(): array { - $flags = array_reduce($flag, fn (int $flag, int $value): int => $flag | $value, $this->flags); - if ($flags === $this->flags) { - return $this; + static $methods; + + if (null === $methods) { + /** @var array $jsonFlags */ + $jsonFlags = get_defined_constants(true)['json']; + $flagNames = array_filter( + $jsonFlags, + fn (string $key) => str_starts_with($key, 'JSON_') && !( + str_starts_with($key, 'JSON_ERROR_') + || in_array($key, ['JSON_BIGINT_AS_STRING', 'JSON_OBJECT_AS_ARRAY', 'JSON_THROW_ON_ERROR'], true) + ), + ARRAY_FILTER_USE_KEY + ); + + $methods = []; + foreach ($flagNames as $name => $value) { + $methods[lcfirst(str_replace('_', '', ucwords(strtolower(substr($name, 5)), '_')))] = $value; + } } - return new self($flags, $this->depth, strlen($this->indentation), $this->formatter); + return $methods; + } + + /** + * Adds a list of JSON flags. + */ + public function withFlags(int ...$flags): self + { + return $this->setFlags( + array_reduce($flags, fn (int $carry, int $flag): int => $carry | $flag, $this->flags) + ); } /** * Removes a list of JSON flags. */ - public function removeFlags(int ...$flag): self + public function withoutFlags(int ...$flags): self { - $flags = array_reduce($flag, fn (int $flag, int $value): int => $flag & ~$value, $this->flags); - if ($flags === $this->flags) { - return $this; - } + return $this->setFlags( + array_reduce($flags, fn (int $carry, int $flag): int => $carry & ~$flag, $this->flags) + ); + } - return new self($flags, $this->depth, strlen($this->indentation), $this->formatter); + private function setFlags(int $flags): self + { + return match ($flags) { + $this->flags => $this, + default => new self($flags, $this->depth, $this->indentSize, $this->formatter), + }; } /** @@ -112,11 +197,10 @@ public function removeFlags(int ...$flag): self */ public function depth(int $depth): self { - if ($depth === $this->depth) { - return $this; - } - - return new self($this->flags, $depth, strlen($this->indentation), $this->formatter); + return match ($depth) { + $this->depth => $this, + default => new self($this->flags, $depth, $this->indentSize, $this->formatter), + }; } /** @@ -126,11 +210,10 @@ public function depth(int $depth): self */ public function indentSize(int $indentSize): self { - if ($indentSize === strlen($this->indentation)) { - return $this; - } - - return new self($this->flags, $this->depth, $indentSize, $this->formatter); + return match ($indentSize) { + $this->indentSize => $this, + default => new self($this->flags, $this->depth, $indentSize, $this->formatter), + }; } /** @@ -138,7 +221,7 @@ public function indentSize(int $indentSize): self */ public function formatter(?Closure $formatter): self { - return new self($this->flags, $this->depth, strlen($this->indentation), $formatter); + return new self($this->flags, $this->depth, $this->indentSize, $formatter); } /** @@ -161,29 +244,29 @@ public function formatter(?Closure $formatter): self */ public function save(iterable $records, mixed $destination, $context = null): int { - $bytes = 0; $stream = match(true) { $destination instanceof Stream, $destination instanceof SplFileObject => $destination, $destination instanceof SplFileInfo => $destination->openFile(mode:'w', context: $context), is_resource($destination) => Stream::createFromResource($destination), - is_string($destination) => Stream::createFromPath(path: $destination, open_mode:'w', context: $context), - default => throw new InvalidArgumentException('The path must be a stream or a SplFileInfo object.'), + is_string($destination) => Stream::createFromPath($destination, 'w', $context), + default => throw new InvalidArgumentException('The destination path must be a filename, a stream or a SplFileInfo object.'), }; - + $bytes = 0; + $writtenBytes = 0; set_error_handler(fn (int $errno, string $errstr, string $errfile, int $errline) => true); foreach ($this->convert($records) as $line) { - $addedBytes = $stream->fwrite($line); - if (false === $addedBytes) { - restore_error_handler(); - - throw new RuntimeException('Unable to write to the stream.'); + if (false === ($writtenBytes = $stream->fwrite($line))) { + break; } - $bytes += $addedBytes; + $bytes += $writtenBytes; } - restore_error_handler(); + if (false === $writtenBytes) { + throw new RuntimeException('Unable to write to the stream.'); + } + return $bytes; } @@ -197,12 +280,11 @@ public function save(iterable $records, mixed $destination, $context = null): in */ public function encode(iterable $records): string { - $json = ''; - foreach ($this->convert($records) as $line) { - $json .= $line; - } + $stream = Stream::createFromString(); + $this->save($records, $stream); + $stream->rewind(); - return $json; + return $stream->getContents(); /* @phpstan-ignore-line */ } /** @@ -285,21 +367,21 @@ private function setInternalFormatter(): Closure return $callback; } - return fn (string $json, int|string $offset): string => $this->prettyPrint($callback($json, $offset)); + return fn (string $json, int|string $offset): string => $this->prettyPrint($callback($json, $offset), str_repeat(' ', $this->indentSize)); } /** * Pretty Print the JSON string without using JSON_PRETTY_PRINT * The method also allow using an arbitrary length for the indentation. */ - private function prettyPrint(string $json): string + private function prettyPrint(string $json, string $indentation): string { $level = 1; $inQuotes = false; $escape = false; $length = strlen($json); - $str = $this->indentation; + $str = $indentation; for ($i = 0; $i < $length; $i++) { $char = $json[$i]; if ('"' === $char && !$escape) { @@ -308,9 +390,9 @@ private function prettyPrint(string $json): string $escape = '\\' === $char && !$escape; $str .= $inQuotes ? $char : match ($char) { - '{', '[' => $char."\n".str_repeat($this->indentation, ++$level), - '}', ']' => "\n".str_repeat($this->indentation, --$level).$char, - ',' => $char."\n".str_repeat($this->indentation, $level), + '{', '[' => $char."\n".str_repeat($indentation, ++$level), + '}', ']' => "\n".str_repeat($indentation, --$level).$char, + ',' => $char."\n".str_repeat($indentation, $level), ':' => $char.' ', default => $char, }; diff --git a/src/JsonConverterTest.php b/src/JsonConverterTest.php index b39ecacb..4fe7d4be 100644 --- a/src/JsonConverterTest.php +++ b/src/JsonConverterTest.php @@ -19,6 +19,7 @@ use PHPUnit\Framework\TestCase; use const JSON_FORCE_OBJECT; +use const JSON_HEX_QUOT; use const JSON_PRETTY_PRINT; use const JSON_UNESCAPED_SLASHES; @@ -34,8 +35,8 @@ public function it_will_convert_a_tabular_data_reader_into_a_json(): void CharsetConverter::addTo($csv, 'iso-8859-15', 'utf-8'); $converter = JsonConverter::create() - ->addFlags(JSON_PRETTY_PRINT, JSON_UNESCAPED_SLASHES, JSON_FORCE_OBJECT) - ->removeFlags(JSON_FORCE_OBJECT) + ->withFlags(JSON_PRETTY_PRINT, JSON_UNESCAPED_SLASHES, JSON_FORCE_OBJECT) + ->withoutFlags(JSON_FORCE_OBJECT) ->depth(24); $records = Statement::create()->offset(3)->limit(5)->process($csv); @@ -50,7 +51,7 @@ public function it_will_convert_a_tabular_data_reader_into_a_json(): void self::assertSame($converter->encode($records), $nativeJson); self::assertSame(JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT, $converter->flags); self::assertSame(24, $converter->depth); - self::assertSame(' ', $converter->indentation); + self::assertSame(4, $converter->indentSize); self::assertTrue($converter->isPrettyPrint); self::assertFalse($converter->isForceObject); } @@ -64,8 +65,8 @@ public function it_has_default_values(): void $converter, $converter ->indentSize(4) - ->addFlags(0) - ->removeFlags(0) + ->withFlags(0) + ->withoutFlags(0) ->depth(512) ); } @@ -101,7 +102,7 @@ public function it_returns_a_null_object_if_the_collection_is_empty(): void $converter = JsonConverter::create(); self::assertSame('[]', $converter->encode([])); - self::assertSame('{}', $converter->addFlags(JSON_FORCE_OBJECT)->encode([])); + self::assertSame('{}', $converter->withFlags(JSON_FORCE_OBJECT)->encode([])); } #[Test] @@ -112,4 +113,22 @@ public function it_can_manipulate_the_record_prior_to_json_encode(): void self::assertSame('[{"foo":"BAR"}]', $converter->encode([['foo' => 'bar']])); } + + #[Test] + public function it_can_use_syntactic_sugar_methods_to_set_json_flags(): void + { + $usingJsonFlags = JsonConverter::create() + ->withFlags(JSON_PRETTY_PRINT, JSON_UNESCAPED_SLASHES, JSON_FORCE_OBJECT) + ->withoutFlags(JSON_HEX_QUOT) + ->depth(24); + + $usingMethodFlags = JsonConverter::create() + ->withPrettyPrint() + ->withUnescapedSlashes() + ->withForceObject() + ->withoutHexQuot() + ->depth(24); + + self::assertEquals($usingJsonFlags, $usingMethodFlags); + } } diff --git a/src/Stream.php b/src/Stream.php index f966f2d8..a870669d 100644 --- a/src/Stream.php +++ b/src/Stream.php @@ -504,4 +504,9 @@ public function getSize(): int|false { return fstat($this->stream)['size'] ?? false; } + + public function getContents(?int $length = null, int $offset = -1): string|false + { + return stream_get_contents($this->stream, $length, $offset); + } }