diff --git a/docs/9.0/reader/record-mapping.md b/docs/9.0/reader/record-mapping.md new file mode 100644 index 00000000..da530d31 --- /dev/null +++ b/docs/9.0/reader/record-mapping.md @@ -0,0 +1,288 @@ +--- +layout: default +title: Deserializing a Tabular Data +--- + +# Mapping records to objects + +

New in version 9.12.0

+ +## Pre-requisite + +If you prefer working with objects instead of typed arrays it is possible to map each record to +a specified class. To do so the a new `Serializer` class is introduced to expose a deserialization mechanism +which expects the following: + +- the name of the class the array will be deserialized in; +- information on how to convert cell value into object properties using dedicated attributes; + +As an example if we assume we have the following CSV document: + +```csv +date,temperature,place +2011-01-01,1,Galway +2011-01-02,-1,Galway +2011-01-03,0,Galway +2011-01-01,6,Berkeley +2011-01-02,8,Berkeley +2011-01-03,5,Berkeley +``` + +We can define a PHP DTO using the following class and the attributes. + +```php + '!Y-m-d'])] + public DateTimeImmutable $date; + ) { + } +} + +enum Place +{ + case Berkeley; + case Galway; +} +``` + +To get instances of your object, you now can call the `Serializer::deserialize` method as show below: + +```php +use League\Csv\Reader; +use League\Csv\Serializer + +$csv = Reader::createFromString($document); +$csv->setHeaderOffset(0); +$serializer = new Serializer(Weather::class, $csv->header()); +foreach ($serializer->deserializeAll($csv) as $weather) { + // each $weather entry will be an instance of the Weather class; +} +``` + +## Defining the mapping rules + +The `Record` attribute is responsible for converting array values into the appropriate instance +properties. This means that in order to use the `Record` attribute you are required to have +an associative `array`. + +The deserialization engine is able to cast the value into +the appropriate type if it is a `string` or `null` and the object public properties ares typed with + +- `null` +- `mixed` +- a scalar type (support for `true` and `false` type is also present) +- any `Enum` object (backed or not) +- `DateTime`, `DateTimeImmuntable` and any class that extends those two classes. +- an `array` + +When converting to a date object you can fine tune the conversion by optionally specifying the date +format and timezone. You can do so using the `Cell` attribute. This attribute will override the automatic +resolution and enable fine-tuning type casting on the property level. + +```php +use League\Csv\Serializer; +use Carbon\CarbonImmutable; + +#[Serializer\Cell( + offset:'date', + cast:Serializer\CastToDate::class, + castArguments: [ + 'format' => '!Y-m-d', + 'timezone' => 'Africa/Nairobi' + ]) +] +public CarbonImmutable $observedOn; +``` + +The above rule can be translated in plain english like this: + +> convert the value of the associative array named `date` into a `CarbonImmutable` object +> using the date format `!Y-m-d` and the `Africa/Nairobi` timezone. Once created, +> inject the date instance into the `observedOn` property of the class. + +The `Cell` attribute differs from the `Record` attribute as it can be used: + +- on class properties and methods (public, protected or private). +- with `array` as list (you are required, in this case, to specify the `offset` argument). + +The `Cell` attribute can take up to three (3) arguments which are all optional: + +- The `offset` argument which tell the engine which cell to use via its numeric or name offset. If not present +the property name or the name of the first argument of the `setter` method will be used. In such case, +you are required to specify the property names information. +- The `cast` argument which accept the name of a class implementing the `TypeCasting` interface and responsible +for type casting the cell value. +- The `castArguments` which enable controlling typecasting by providing extra arguments to the `TypeCasting` class constructor + +In any cases, if type casting fails, an exception will be thrown. + +## Type casting the record value + +The library comes bundles with four (4) type casting classes which relies on the property type information. All the +built-in methods support the `nullable` type. They will return `null` if the cell value is the empty string or `null` +only if the type is considered to be `nullable` otherwise they will throw an exception. +All classes are defined under the `League\Csv\Serializer` namespace. + +### CastToBuiltInType + +Converts the array value to a scalar type or `null` depending on the property type information. This class has no +specific configuration but will work with all the scalar type, the `true`, `null` and `false` value type as well as +with the `mixed` type. Type casting is done using the `filter_var` functionality of the `ext-filter` extension. + +### CastToEnum + +Convert the array value to a PHP `Enum` it supported both "real" and backed enumeration. No configuration is needed +if the value is not recognized an exception will be thrown. + +### CastToDate + +Converts the cell value into a PHP `DateTimeInterface` implementing object. You can optionally specify the date format and its timezone if needed. + +### CastToArray + +Converts the value into a PHP `array`. You are required to specify what type of conversion you desired (`list`, `json` or `csv`). + +The following are example for each type: + +```php +$array['field1'] = "1,2,3,4"; //the string contains only a separator (type list) +$arrat['field2'] = '"1","2","3","4"'; //the string contains delimiter and enclosure (type csv) +$arrat['field3'] = '{"foo":"bar"}'; //the string is a json string (type json) +``` + +in case of + +- the `list` type you can configure the `separator`, by default it is the `,`; +- the `csv` type you can configure the `separator` and the `enclosure`, by default they are respectively `,` and `"`; +- the `json` type you can configure the `jsonDepth` and the `jsonFlags` using the `json_decode` arguments, the default are the same ; + +Here's a example for casting a string via the `json` type. + +```php +use League\Csv\Serializer; +use Carbon\CarbonImmutable; + +#[Serializer\Cell( + cast:Serializer\CastToArray::class, + castArguments: [ + 'type' => 'json', + 'jsonFlags' => JSON_BIGINT_AS_STRING + ]) +] +public array $data; +``` + +In the above example, the array has a JSON value associated with the key `data` and the `Serializer` will convert the +JSON string into an `array` and use the `JSON_BIGINT_AS_STRING` option of the `json_decode` function. + +### Creating your own TypeCasting class + +You can also provide your own class to typecast the array value according to your own rules. To do so, first, +specify your casting with the attribute: + +```php +use League\Csv\Serializer; +#[Serializer\Cell( + offset: 'rating', + cast: IntegerRangeCasting::class, + castArguments: ['min' => 0, 'max' => 5, 'default' => 2] +)] +private int $ratingScore; +``` + +The `IntegerRangeCasting` will convert cell value and return data between `0` and `5` and default to `2` if +the value is wrong or invalid. To allow your object to cast the cell value to your liking it needs to +implement the `TypeCasting` interface. To do so, you must define a `toVariable` method that will return +the correct value once converted. + +```php +use League\Csv\Serializer\TypeCasting; +use League\Csv\Serializer\TypeCastingFailed; + +/** + * @implements TypeCasting + */ +readonly class IntegerRangeCasting implements TypeCasting +{ + public function __construct( + private int $min, + private int $max, + private int $default, + ) { + if ($max < $min) { + throw new LogicException('The maximum value can not be lesser than the minimum value.'); + } + } + + public function toVariable(?string $value, string $type): ?int + { + // if the property is declared as nullable we exist early + if (in_array($value, ['', null], true) && str_starts_with($type, '?')) { + return null; + } + + //the type casting class must only work with property declared as integer + if ('int' !== ltrim($type, '?')) { + throw new TypeCastingFailed('The class '. self::class . ' can only work with integer typed property.'); + } + + return filter_var( + $value, + FILTER_VALIDATE_INT, + ['options' => ['min' => $this->min, 'max' => $this->max, 'default' => $this->default]] + ); + } +} +``` + +As you have probably noticed, the class constructor arguments are given to the `Cell` attribute via the +`castArguments` which can provide more fine-grained behaviour. + +## Converting an array to an object + +The `Serializer` class exposes three (3) methods to ease `array` to `object` conversion: + +- `Serializer::deserialize` which expect a single recrods as argument and returns on success an instance of the class. +- `Serializer::deserializeAll` which expect a collection of records and returns a collection of class instances. +- and the public static method `Serializer::map` which is a quick way to declare and converting a single record into an object. + +```php +use League\Csv\Serializer; + +$record = [ + 'date' => '2023-10-30', + 'temperature' => '-1.5', + 'place' => 'Berkeley', +]; + +$weather = Serializer::map(Weather::class, $record); + +// this is the same as writing the following +$serializer = new Serializer(Weather::class, array_keys($record)); +$weather = $serializer->deserialize($record); +``` + +If you are working with a class which implements the `TabularDataReader` interface you can use this functionality directly +by calling the `map` method. + +We can rewrite the first example of this page as the following: + +```php +use League\Csv\Reader; + +$csv = Reader::createFromString($document); +$csv->setHeaderOffset(0); +foreach ($csv->map($csv) as $weather) { + // each $weather entry will be an instance of the Weather class; +} +``` diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md index 66b33c6f..374c29a1 100644 --- a/docs/9.0/reader/tabular-data-reader.md +++ b/docs/9.0/reader/tabular-data-reader.md @@ -79,6 +79,9 @@ $reader->getHeader(); //is empty because no header information was given ### getRecords +

Added in version 9.6.0 for ResultSet.

+

full mapper usage was completed in version 9.12 for Reader and ResultSet.

+ The `getRecords` enables iterating over all records from the current object. If the optional `$header` argument is given, it will be used as a mapper on the record and will update the record header and the value position. @@ -105,216 +108,20 @@ var_dump([...$records][0]); // ] ``` -

full mapper usage was completed in version 9.12 for Reader and ResultSet.

-

Added in version 9.6.0 for ResultSet.

-

If the header record contains non-unique string values, a Exception exception is triggered.

-

since 9.12.0 the optional $header is a full mapper

- The argument now links the records column offset to a specific column name. In other words this means -that the array key which MUST be a positive integer or `0` will correspond to the CSV column offset +that the array key which **MUST** be a positive integer or `0` will correspond to the CSV column offset and its value will represent its header value. This means that you can re-arrange the column order as well as removing or adding column to the returned iterator. Added column will only contain the `null` value. -### Mapping records to objects - -

New in version 9.12.0

- -If you prefer working with objects instead of typed arrays it is possible to convert each record using -the `map` method. This method will cast each array record into your specified object. To do so, -the method excepts: - -- as its sole argument the name of the class the array will be deserialized in; -- information on how to convert cell value into object properties using dedicated attributes; - -As an example if we assume we have the following CSV document: - -```csv -date,temperature,place -2011-01-01,1,Galway -2011-01-02,-1,Galway -2011-01-03,0,Galway -2011-01-01,6,Berkeley -2011-01-02,8,Berkeley -2011-01-03,5,Berkeley -``` - -We can define a PHP DTO using the following class and the attributes. - -```php - '!Y-m-d'])] - public DateTimeImmutable $date; - ) { - } -} - -enum Place -{ - case Berkeley; - case Galway; -} -``` - -To get instances of your object, you now can call the `map` method as show below: - -```php -$csv = Reader::createFromString($document); -$csv->setHeaderOffset(0); -foreach ($csv->map(Weather::class) as $weather) { - // each $weather entry will be an instance of the Weather class; -} -``` - -The `Record` attribute is responsible for converting record cell values into the appropriate instance -properties. This means that in order to use the `Record` attribute you are required to have -a `TabularDataReader` with a non-empty header. - -The deserialization engine is able to cast the tabular data value into -the appropriate type if its value is a `string` or `null` and the object public properties ares typed with - -- `null` -- `mixed` -- a scalar type (support for `true` and `false` type is also present) -- any `Enum` object (backed or not) -- `DateTime`, `DateTimeImmuntable` and any class that extends those two classes. - -When converting to a date object you can fine tune the conversion by optionally specifying the date -format and timezone. You can do so using the `Cell` attribute. This attribute will override the automatic -resolution and enable fine-tuning type casting on the property level. - -```php -use League\Csv\Serializer\Cell; -use Carbon\CarbonImmutable; - -#[Cell( - offset:'date', - cast:Serializer\CastToDate::class, - castArguments: [ - 'format' => '!Y-m-d', - 'timezone' => 'Africa/Nairobi' - ]) -] -public CarbonImmutable $observedOn; -``` - -The above rule can be translated in plain english like this: - -> convert the value of the record cell named `date` into a `CarbonImmutable` object to -> inject into the `observedOn` property of the class using the date format `!Y-m-d` and the `Africa/Nairobi` -> timezone and the `CarbonImmutable::class`. - -The `Cell` attribute differs from the `Record` attribute as it can be used: - -- on class properties and methods (public, protected or private). -- with tabular data **without header** (in absence of header you are required to specify the offset number). - -The `Cell` attribute can take up to three (3) arguments which are all optional: - -- The `offset` argument which tell the engine which cell to use via its numeric or name offset. If not present -the property name or the name of the first argument of the `setter` method will be used. In such case, -the tabular data must be using a non-empty header. -- The `cast` argument which accept the name of a class implementing the `TypeCasting` interface and responsible -for type casting the cell value. -- The `castArguments` which enable controlling typecasting by providing extra arguments to the `TypeCasting` class constructor - -In any cases, if type casting fails, an exception will be thrown. - -The library comes bundles with three (3) type casting classes which relies on the property type information: - -- `CastToBuiltInType`: converts the cell value to a scalar type or `null`, `true` depending on the property type information. -- `CastToDate`: converts the cell value into a PHP `DateTimeInterface` implementing object. You can optionally specify the date format and its timezone if needed. -- `CastToEnum`: converts the cell value into a PHP `Enum`. - -You can also provide your own class to typecast the cell value according to your own rules. To do so, first, -specify your casting with the attribute: - -```php -use League\Csv\Serializer\Cell; -#[Cell( - offset: 'rating', - cast: IntegerRangeCasting::class, - castArguments: ['min' => 0, 'max' => 5, 'default' => 2] -)] -private int $ratingScore; -``` - -The `IntegerRangeCasting` will convert cell value and return data between `0` and `5` and default to `2` if -the value is wrong or invalid. To allow your object to cast the cell value to your liking it needs to -implement the `TypeCasting` interface. To do so, you must define a `toVariable` method that will return -the correct value once converted. - -```php -use League\Csv\Serializer\TypeCasting; -use League\Csv\Serializer\TypeCastingFailed; - -/** - * @implements TypeCasting - */ -readonly class IntegerRangeCasting implements TypeCasting -{ - public function __construct( - private int $min, - private int $max, - private int $default, - ) { - if ($max < $min) { - throw new LogicException('The maximum value can not be lesser than the minimum value.'); - } - } - - public function toVariable(?string $value, string $type): ?int - { - // if the property is declared as nullable we exist early - if (in_array($value, ['', null], true) && str_starts_with($type, '?')) { - return null; - } - - //the type casting class must only work with property declared as integer - if ('int' !== ltrim($type, '?')) { - throw new TypeCastingFailed('The class '. self::class . ' can only work with integer typed property.'); - } - - return filter_var( - $value, - FILTER_VALIDATE_INT, - ['options' => ['min' => $this->min, 'max' => $this->max, 'default' => $this->default]] - ); - } -} -``` - -As you have probably noticed, the class constructor arguments are given to the `Column` attribute via the -`castArguments` which can provide more fine-grained behaviour. - -Last but not least if you only which to convert a single record, you can do so using the `Serializer::map` static -method. - -```php -use League\Csv\Serializer; - -$record = [ - 'date' => '2023-10-30', - 'temperature' => '-1.5', - 'place' => 'Berkeley', -]; - -$weather = Serializer::map(Weather::class, $record); -``` +

If the header record contains non-unique string values, a Exception exception is triggered.

### value, first and nth +

first and nth were added in version 9.9.0 for Reader and ResultSet.

+

value was added in version 9.12.0 for Reader and ResultSet.

+ You may access any record using its offset starting at `0` in the collection using the `nth` method. if no record is found, an empty `array` is returned. @@ -344,8 +151,6 @@ $result->nth(0); As an alias to `nth`, the `first` method returns the first record from the instance without the need of an argument. -

Added in version 9.9.0 for Reader and ResultSet.

- If you are only interested in retrieving a specific value from a single row, you can use the `value` method. By default, it will return the first record item, but you are free to specify a specific column using the column name if the header is set and/or the @@ -369,10 +174,13 @@ $result->value('toto'); //returns null $result->value(42); //returns null ``` -

Added in version 9.12.0 for Reader and ResultSet.

+

The fetchOne method was deprecated in version 9.9.0. +it is recommanded to use the nth method instead.

### exists +

Added in version 9.11.0 for Reader and ResultSet.

+ Tests for the existence of a record that satisfies a given predicate. ```php @@ -387,8 +195,6 @@ $exists = $resultSet->exists(fn (array $records) => in_array('twenty-five', $rec //$exists returns true if at least one cell contains the word `twenty-five` otherwise returns false, ``` -

Added in version 9.11.0 for Reader and ResultSet.

- ## Selecting columns ### fetchColumnByName @@ -480,6 +286,8 @@ foreach ($records->fetchPairs() as $firstname => $lastname) { ### each +

Added in version 9.11.0 for Reader and ResultSet.

+ The `each` method iterates over the records in the tabular data collection and passes each reacord to a closure. @@ -503,10 +311,10 @@ $reader->each(function (array $record, int $offset) use ($writer) { You may interrupt the iteration if the closure passed to `each` returns `false`. -

Added in version 9.11.0 for Reader and ResultSet.

- ### reduce +

Added in version 9.11.0 for Reader and ResultSet.

+ The `reduce` method reduces the tabular data structure to a single value, passing the result of each iteration into the subsequent iteration: @@ -524,7 +332,25 @@ $nbTotalCells = $resultSet->recude(fn (?int $carry, array $records) => ($carry ? The closure is similar as the one used with `array_reduce`. -

Added in version 9.11.0 for Reader and ResultSet.

+### map + +

Added in version 9.12.0 for Reader and ResultSet.

+ +If you prefer working with objects instead of typed arrays it is possible to convert each record using +the `map` method. This method will cast each array record into your specified object. + +To get instances of your object, you are required to call the `map` method as show below: + +```php +$csv = Reader::createFromString($document); +$csv->setHeaderOffset(0); +foreach ($csv->map(Weather::class) as $weather) { + // each $weather entry will be an instance of the Weather class; +} +``` + +

You can get more info on how to configure your class to enable this feature by +visiting the record mapping documentation page

## Collection methods diff --git a/docs/_data/menu.yml b/docs/_data/menu.yml index 26e75d49..d0a9f16c 100644 --- a/docs/_data/menu.yml +++ b/docs/_data/menu.yml @@ -19,6 +19,7 @@ version: CSV Reader: '/9.0/reader/' Result Set: '/9.0/reader/resultset/' Constraint Builders: '/9.0/reader/statement/' + Record Mapping: '/9.0/reader/record-mapping/' Interoperability: Overview : '/9.0/interoperability/' Document Encoding : '/9.0/interoperability/encoding/' diff --git a/src/Serializer/CastToArray.php b/src/Serializer/CastToArray.php index 5bacf082..b88124b9 100644 --- a/src/Serializer/CastToArray.php +++ b/src/Serializer/CastToArray.php @@ -42,7 +42,6 @@ public function __construct( private readonly string $type, private readonly string $separator = ',', private readonly string $enclosure = '"', - private readonly string $escape = '\\', private readonly int $jsonDepth = 512, private readonly int $jsonFlags = 0, ) { @@ -51,7 +50,6 @@ public function __construct( 1 > $this->jsonDepth => throw new MappingFailed('the json depth can not be less than 1.'), /* @phpstan-ignore-line */ 1 !== strlen($this->separator) => throw new MappingFailed('expects delimiter to be a single character; `'.$this->separator.'` given.'), 1 !== strlen($this->enclosure) => throw new MappingFailed('expects enclosire to be a single character; `'.$this->enclosure.'` given.'), - '' !== $this->escape && 1 !== strlen($this->escape) => throw new MappingFailed('expects escape to be a single character or the empty string; `'.$this->escape.'` given.'), default => null, }; } @@ -73,7 +71,7 @@ public function toVariable(?string $value, string $type): ?array $result = match ($this->type) { self::TYPE_JSON => json_decode($value, true, $this->jsonDepth, $this->jsonFlags | JSON_THROW_ON_ERROR), self::TYPE_LIST => explode($this->separator, $value), - default => str_getcsv($value, $this->separator, $this->enclosure, $this->escape), + default => str_getcsv($value, $this->separator, $this->enclosure, ''), }; if (!is_array($result)) {