From 054605fe8415100de1b8d8032cfb0edaa7bf105f Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Mon, 2 Oct 2023 23:40:00 +0200 Subject: [PATCH 1/5] Update select test suite --- src/Reader.php | 90 ++++++++++++++++++++++++------------- src/ReaderTest.php | 75 +++++++++++++++++++++++++++++++ src/ResultSet.php | 78 ++++++++++++++++++++++---------- src/ResultSetTest.php | 95 +++++++++++++++++++++++++++++++++++++++ src/TabularDataReader.php | 1 + 5 files changed, 283 insertions(+), 56 deletions(-) diff --git a/src/Reader.php b/src/Reader.php index f68ee9f7..58142998 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -121,6 +121,29 @@ protected function setHeader(int $offset): array return $header; } + /** + * @throws Exception + */ + private function prepareRecords(): Iterator + { + $normalized = fn ($record): bool => is_array($record) && ($this->is_empty_records_included || $record !== [null]); + $bom = ''; + if (!$this->is_input_bom_included) { + $bom = $this->getInputBOM(); + } + + $records = $this->stripBOM(new CallbackFilterIterator($this->getDocument(), $normalized), $bom); + if (null !== $this->header_offset) { + $records = new CallbackFilterIterator($records, fn (array $record, int $offset): bool => $offset !== $this->header_offset); + } + + if ($this->is_empty_records_included) { + $records = new MapIterator($records, fn (array $record): array => ([null] === $record) ? [] : $record); + } + + return $records; + } + /** * @throws Exception * @@ -329,49 +352,54 @@ public function sorted(Closure $orderBy): TabularDataReader */ public function getRecords(array $header = []): Iterator { - $normalized = fn ($record): bool => is_array($record) && ($this->is_empty_records_included || $record !== [null]); - $bom = ''; - if (!$this->is_input_bom_included) { - $bom = $this->getInputBOM(); + if ($header !== (array_filter($header, is_string(...)))) { + throw SyntaxError::dueToInvalidHeaderColumnNames(); } - $records = $this->stripBOM(new CallbackFilterIterator($this->getDocument(), $normalized), $bom); - if (null !== $this->header_offset) { - $records = new CallbackFilterIterator($records, fn (array $record, int $offset): bool => $offset !== $this->header_offset); - } - - if ($this->is_empty_records_included) { - $records = new MapIterator($records, fn (array $record): array => ([null] === $record) ? [] : $record); - } + return $this->combineHeader($this->prepareRecords(), $this->computeHeader($header)); + } - $header = $this->computeHeader($header); - $formatter = fn (array $record): array => array_reduce( - $this->formatters, - fn (array $record, callable $formatter): array => $formatter($record), - $record - ); + public function select(string|int ...$columns): TabularDataReader + { + $header = []; + $documentHeader = $this->getHeader(); + $hasNoHeader = [] === $documentHeader; + foreach ($columns as $field) { + if (is_string($field)) { + if ($hasNoHeader) { + throw new InvalidArgument(__METHOD__.' can only use named column if the tabular data has a non-empty header.'); + } - return match ([]) { - $header => new MapIterator($records, $formatter(...)), - default => new MapIterator($records, function (array $record) use ($header, $formatter): array { - $assocRecord = []; - foreach ($header as $offset => $headerName) { - $assocRecord[$headerName] = $record[$offset] ?? null; + $index = array_search($field, $this->header, true); + if (false === $index) { + throw InvalidArgument::dueToInvalidColumnIndex($field, 'offset', __METHOD__); } - return $formatter($assocRecord); - }), - }; + $header[$index] = $field; + continue; + } + + if (!$hasNoHeader && !array_key_exists($field, $documentHeader)) { + throw InvalidArgument::dueToInvalidColumnIndex($field, 'offset', __METHOD__); + } + + $header[$field] = $documentHeader[$field] ?? $field; + } + + return new ResultSet( + $this->combineHeader($this->prepareRecords(), $this->computeHeader($header)), + $documentHeader + ); } /** * Returns the header to be used for iteration. * - * @param array $header + * @param array $header * * @throws Exception If the header contains non unique column name * - * @return array + * @return array */ protected function computeHeader(array $header): array { @@ -380,8 +408,7 @@ protected function computeHeader(array $header): array } return match (true) { - $header !== ($filtered_header = array_filter($header, is_string(...))) => throw SyntaxError::dueToInvalidHeaderColumnNames(), - $header !== array_unique($filtered_header) => throw SyntaxError::dueToDuplicateHeaderColumnNames($header), + $header !== array_unique($header) => throw SyntaxError::dueToDuplicateHeaderColumnNames($header), [] !== array_filter(array_keys($header), fn (string|int $value) => !is_int($value) || $value < 0) => throw new SyntaxError('The header mapper indexes should only contain positive integer or 0.'), default => $header, }; @@ -491,7 +518,6 @@ public function fetchOne(int $nth_record = 0): array /** @codeCoverageIgnore */ protected function combineHeader(Iterator $iterator, array $header): Iterator { - $header = $this->computeHeader($header); $formatter = fn (array $record): array => array_reduce( $this->formatters, fn (array $record, callable $formatter): array => $formatter($record), diff --git a/src/ReaderTest.php b/src/ReaderTest.php index 4f3918b4..cf3f6475 100644 --- a/src/ReaderTest.php +++ b/src/ReaderTest.php @@ -673,4 +673,79 @@ public function testHeaderMapperFailsWithInvalidMapper(): void Reader::createFromString($csv) ->getRecords(['Annee' => 'Year', 'Prenom' => 'Firstname', 'Nombre' => 'Count']); } + + public function testTabularReaderSelect(): void + { + $csv = << 'temperature', 2 => 'place'], $reader->select(1, 2)->first()); + + $reader->setHeaderOffset(0); + + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select(1, 2)->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select('temperature', 'place')->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select(1, 'place')->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select('temperature', 2)->first()); + } + + public function testTabularReaderSelectFailsWithInvalidColumn(): void + { + $csv = <<expectException(InvalidArgument::class); + + Reader::createFromString($csv) + ->select('temperature', 'place'); + } + + public function testTabularReaderSelectFailsWithInvalidColumnName(): void + { + $csv = <<expectException(InvalidArgument::class); + + Reader::createFromString($csv) + ->setHeaderOffset(0) + ->select('temperature', 'foobar'); + } + + public function testTabularReaderSelectFailsWithInvalidColumnOffset(): void + { + $csv = <<expectException(InvalidArgument::class); + + Reader::createFromString($csv) + ->setHeaderOffset(0) + ->select(0, 18); + } } diff --git a/src/ResultSet.php b/src/ResultSet.php index de685823..daf1b1f0 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -42,9 +42,11 @@ class ResultSet implements TabularDataReader, JsonSerializable */ public function __construct(protected Iterator $records, array $header = []) { - $this->header = array_values( - $this->validateHeader($header) - ); + if ($header !== array_filter($header, is_string(...))) { + throw SyntaxError::dueToInvalidHeaderColumnNames(); + } + + $this->header = array_values($this->validateHeader($header)); } /** @@ -53,8 +55,7 @@ public function __construct(protected Iterator $records, array $header = []) protected function validateHeader(array $header): array { return match (true) { - $header !== ($filtered_header = array_filter($header, is_string(...))) => throw SyntaxError::dueToInvalidHeaderColumnNames(), - $header !== array_unique($filtered_header) => throw SyntaxError::dueToDuplicateHeaderColumnNames($header), + $header !== array_unique($header) => throw SyntaxError::dueToDuplicateHeaderColumnNames($header), [] !== array_filter(array_keys($header), fn (string|int $value) => !is_int($value) || $value < 0) => throw new SyntaxError('The header mapper indexes should only contain positive integer or 0.'), default => $header, }; @@ -161,14 +162,60 @@ public function sorted(Closure $orderBy): TabularDataReader * @return Iterator> */ public function getRecords(array $header = []): Iterator + { + if ($header !== array_filter($header, is_string(...))) { + throw SyntaxError::dueToInvalidHeaderColumnNames(); + } + + yield from $this->combineHeader($header); + } + + public function select(string|int ...$columns): TabularDataReader + { + $header = []; + $documentHeader = $this->getHeader(); + $hasNoHeader = [] === $documentHeader; + foreach ($columns as $field) { + if (is_string($field)) { + if ($hasNoHeader) { + throw new InvalidArgument(__METHOD__.' can only use named column if the tabular data has a non-empty header.'); + } + + $index = array_search($field, $this->header, true); + if (false === $index) { + throw InvalidArgument::dueToInvalidColumnIndex($field, 'offset', __METHOD__); + } + + $header[$index] = $field; + continue; + } + + if (!$hasNoHeader && !array_key_exists($field, $documentHeader)) { + throw InvalidArgument::dueToInvalidColumnIndex($field, 'offset', __METHOD__); + } + + $header[$field] = $documentHeader[$field] ?? $field; + } + + return new self($this->combineHeader($header), $documentHeader); + } + + /** + * Combines the header to each record if present. + * + * @param array $header + * + * @return Iterator> + */ + protected function combineHeader(array $header): Iterator { $header = $this->validateHeader($header); if ([] === $header) { $header = $this->header; } - yield from match (true) { - [] === $header => $this->records, + return match (true) { + $header === $this->header, [] === $header => $this->records, default => new MapIterator($this->records, function (array $record) use ($header): array { $assocRecord = []; $row = array_values($record); @@ -323,21 +370,4 @@ public function fetchColumn($index = 0): Iterator $this->getColumnIndex($index, 'offset', __METHOD__) ); } - - /** @codeCoverageIgnore */ - protected function combineHeader(array $header): Iterator - { - return match (true) { - $header === $this->header, [] === $header => $this->records, - default => new MapIterator($this->records, function (array $record) use ($header): array { - $assocRecord = []; - $row = array_values($record); - foreach ($header as $offset => $headerName) { - $assocRecord[$headerName] = $row[$offset] ?? null; - } - - return $assocRecord; - }), - }; - } } diff --git a/src/ResultSetTest.php b/src/ResultSetTest.php index ebbbb244..bd269efa 100644 --- a/src/ResultSetTest.php +++ b/src/ResultSetTest.php @@ -457,4 +457,99 @@ public function testHeaderMapperOnResultSetAlwaysIgnoreTheColumnName(): void ->process($reader) ->getRecords(['lastname' => 'nom de famille', 'firstname' => 'prenom', 'e-mail' => 'e-mail'])]; } + + + public function testTabularReaderSelect(): void + { + $csv = << 'temperature', 2 => 'place'], $reader->select(1, 2)->first()); + + $reader = ResultSet::createFromTabularDataReader(Reader::createFromString($csv)->setHeaderOffset(0)); + + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select(1, 2)->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select('temperature', 'place')->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select(1, 'place')->first()); + self::assertSame(['temperature' => '1', 'place' => 'Galway'], $reader->select('temperature', 2)->first()); + } + + public function testTabularReaderSelectFailsWithInvalidColumn(): void + { + $csv = <<expectException(InvalidArgument::class); + + ResultSet::createFromTabularDataReader(Reader::createFromString($csv)) + ->select('temperature', 'place'); + } + + public function testTabularReaderSelectFailsWithInvalidColumnName(): void + { + $csv = <<expectException(InvalidArgument::class); + + ResultSet::createFromTabularDataReader( + Reader::createFromString($csv)->setHeaderOffset(0) + )->select('temperature', 'foobar'); + } + + public function testTabularReaderSelectFailsWithInvalidColumnOffset(): void + { + $csv = <<expectException(InvalidArgument::class); + + ResultSet::createFromTabularDataReader( + Reader::createFromString($csv)->setHeaderOffset(0) + )->select(0, 18); + } + + public function testTabularReaderGetRecordsFailsWithInvalidColumnName(): void + { + $csv = <<expectException(SyntaxError::class); + + [...ResultSet::createFromTabularDataReader( + Reader::createFromString($csv)->setHeaderOffset(0) + )->getRecords([72])]; /* @phpstan-ignore-line */ + } + } diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php index 8df95cfc..c74a88d0 100644 --- a/src/TabularDataReader.php +++ b/src/TabularDataReader.php @@ -31,6 +31,7 @@ * @method TabularDataReader filter(Closure $closure) returns all the elements of this collection for which your callback function returns `true` * @method TabularDataReader slice(int $offset, int $length = null) extracts a slice of $length elements starting at position $offset from the Collection. * @method TabularDataReader sorted(Closure $orderBy) sorts the Collection according to the closure provided see Statement::orderBy method + * @method TabularDataReader select(string|int ...$columnOffsetOrName) extract a selection of the tabular data records columns. */ interface TabularDataReader extends Countable, IteratorAggregate { From 7146d987a4ab99e64bba1a44211fe80488cae904 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Mon, 2 Oct 2023 23:40:56 +0200 Subject: [PATCH 2/5] Adding support for RFC7111 via the FragmentFinder class --- src/FragmentFinder.php | 346 +++++++++++++++++++++++++++++++++++++ src/FragmentFinderTest.php | 262 ++++++++++++++++++++++++++++ src/FragmentNotFound.php | 20 +++ 3 files changed, 628 insertions(+) create mode 100644 src/FragmentFinder.php create mode 100644 src/FragmentFinderTest.php create mode 100644 src/FragmentNotFound.php diff --git a/src/FragmentFinder.php b/src/FragmentFinder.php new file mode 100644 index 00000000..a6b460fb --- /dev/null +++ b/src/FragmentFinder.php @@ -0,0 +1,346 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use function array_reduce; +use function count; +use function explode; +use function filter_var; +use function preg_match; +use function range; + +use const FILTER_VALIDATE_INT; + +final class FragmentFinder +{ + private const REGEXP_URI_FRAGMENT = ',^(?row|cell|col)=(?.*)$,i'; + private const REGEXP_ROWS_COLUMNS_SELECTION = '/^(?\d+)(-(?\d+|\*))?$/'; + private const REGEXP_CELLS_SELECTION = '/^(?\d+),(?\d+)(-(?((?\d+),(?\d+))|\*))?$/'; + + public readonly int $fallbackOffset; + + /** + * @throws InvalidArgument + */ + public function __construct(int $fallbackOffset = 0) + { + if (0 > $fallbackOffset) { + throw new InvalidArgument('The fallback offset must be greater or equals to 0.'); + } + + $this->fallbackOffset = $fallbackOffset; + } + + /** + * @throws InvalidArgument + */ + public function fallbackOffset(int $fallbackOffset = 0): self + { + return match (true) { + $fallbackOffset === $this->fallbackOffset => $this, + default => new self($fallbackOffset), + }; + } + + /** + * @throws SyntaxError if the expression can not be parsed + * + * @return iterable + */ + public function all(string $expression, TabularDataReader $tabularDataReader): iterable + { + foreach ($this->parseExpression($expression, $tabularDataReader) as $selection) { + if (-1 < $selection['start']) { + yield $tabularDataReader + ->slice($selection['start'], $selection['length']) + ->select(...$selection['columns']); + } + } + } + + /** + * @throws SyntaxError if the expression can not be parsed + * @throws FragmentNotFound if no fragment are found + * + * @return iterable + */ + public function allOrFail(string $expression, TabularDataReader $tabularDataReader): iterable + { + $selections = $this->parseExpression($expression, $tabularDataReader); + foreach ($selections as $selection) { + yield match (true) { + 0 > $selection['start'] => throw new FragmentNotFound('The expression `'.$selection['selection'].'` contains invalid selection.'), + default => $tabularDataReader + ->slice($selection['start'], $selection['length']) + ->select(...$selection['columns']), + }; + } + } + + /** + * @throws SyntaxError if the expression can not be parsed + * @throws FragmentNotFound if no fragment are found + */ + public function first(string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader + { + foreach ($this->all($expression, $tabularDataReader) as $fragment) { + return $fragment; + } + + return null; + } + + /** + * @throws FragmentNotFound When the expression can not be use + * @throws SyntaxError if the expression can not be parsed + */ + public function firstOrFail(string $expression, TabularDataReader $tabularDataReader): TabularDataReader + { + foreach ($this->allOrFail($expression, $tabularDataReader) as $fragment) { + return $fragment; + } + + //@codeCoverageIgnoreStart + throw new FragmentNotFound('No fragment was found for the expression `'.$expression.'`.'); + //@codeCoverageIgnoreEnd + } + + /** + * @throws SyntaxError if the expression can not be parsed + * + * @return non-empty-array, length:int<-1, max>, columns:array}> + */ + private function parseExpression(string $expression, TabularDataReader $tabularDataReader): array + { + if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, $expression, $matches)) { + throw new SyntaxError('The query expression `'.$expression.'` is invalid.'); + } + + $type = strtolower($matches['type']); + + /** @var non-empty-array, length:int<-1, max>, columns:array}> $res */ + $res = array_reduce( + explode(';', $matches['selections']), + fn (array $selections, string $selection): array => [...$selections, match ($type) { + 'row' => $this->parseRowSelection($selection), + 'col' => $this->parseColumnSelection($selection, $tabularDataReader), + default => $this->parseCellSelection($selection, $tabularDataReader), + }], + [] + ); + + return $res; + } + + /** + * @throws SyntaxError + * + * @return non-empty-array{selection:string, start:int, length:int, columns:array} + */ + private function parseRowSelection(string $selection): array + { + [$start, $end] = $this->parseRowColumnSelection($selection); + + return match (true) { + -1 === $start, + null === $end => [ + 'selection' => $selection, + 'start' => $start, + 'length' => 1, + 'columns' => [], + ], + '*' === $end => [ + 'selection' => $selection, + 'start' => $start, + 'length' => -1, + 'columns' => [], + ], + default => [ + 'selection' => $selection, + 'start' => $start, + 'length' => $end - $start + 1, + 'columns' => [], + ], + }; + } + + /** + * @throws SyntaxError + * + * @return non-empty-array{selection:string, start:int, length:int, columns:array} + */ + private function parseColumnSelection(string $selection, TabularDataReader $tabularDataReader): array + { + [$start, $end] = $this->parseRowColumnSelection($selection); + $header = $tabularDataReader->getHeader(); + if ([] === $header) { + $header = $tabularDataReader->nth($this->fallbackOffset); + } + + $nbColumns = count($header); + + return match (true) { + -1 === $start, + $start >= $nbColumns => [ + 'selection' => $selection, + 'start' => -1, + 'length' => -1, + 'columns' => [], + ], + null === $end => [ + 'selection' => $selection, + 'start' => 0, + 'length' => -1, + 'columns' => [$start], + ], + '*' === $end, + $end > ($nbColumns - 1) => [ + 'selection' => $selection, + 'start' => 0, + 'length' => -1, + 'columns' => range($start, $nbColumns), + ], + default => [ + 'selection' => $selection, + 'start' => 0, + 'length' => -1, + 'columns' => range($start, $end), + ], + }; + } + + /** + * @throws SyntaxError + * + * @return array{int<-1, max>, int|null|'*'} + */ + private function parseRowColumnSelection(string $selection): array + { + if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + + $start = $found['start']; + $end = $found['end'] ?? null; + $start = filter_var($start, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $start) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + --$start; + + if (null === $end || '*' === $end) { + return [$start, $end]; + } + + $end = filter_var($end, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $end) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + --$end; + + if ($end <= $start) { + return [-1, 0]; + } + + return [$start, $end]; + } + + /** + * @throws SyntaxError + * + * @return non-empty-array{selection:string, start:int, length:int, columns:array} + */ + private function parseCellSelection(string $selection, TabularDataReader $tabularDataReader): array + { + if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + + $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellStartRow) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + + $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellStartCol) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + --$cellStartRow; + --$cellStartCol; + + $header = $tabularDataReader->getHeader(); + if ([] === $header) { + $header = $tabularDataReader->nth($this->fallbackOffset); + } + + $nbColumns = count($header); + + if ($cellStartCol > $nbColumns - 1) { + return [ + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; + } + + $cellEnd = $found['end'] ?? null; + if (null === $cellEnd) { + return [ + 'selection' => $selection, + 'start' => $cellStartRow, + 'length' => 1, + 'columns' => [$cellStartCol], + ]; + } + + if ('*' === $cellEnd) { + return [ + 'selection' => $selection, + 'start' => $cellStartRow, + 'length' => -1, + 'columns' => range($cellStartCol, $nbColumns - 1), + ]; + } + + $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellEndRow) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + + $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellEndCol) { + throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + } + + --$cellEndRow; + --$cellEndCol; + + if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) { + return [ + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; + } + + return [ + 'selection' => $selection, + 'start' => $cellStartRow, + 'length' => $cellEndRow - $cellStartRow + 1, + 'columns' => range($cellStartCol, ($cellEndCol > $nbColumns - 1) ? $nbColumns - 1 : $cellEndCol), + ]; + } +} diff --git a/src/FragmentFinderTest.php b/src/FragmentFinderTest.php new file mode 100644 index 00000000..bf772bb8 --- /dev/null +++ b/src/FragmentFinderTest.php @@ -0,0 +1,262 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use Generator; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use RuntimeException; + +final class FragmentFinderTest extends TestCase +{ + private Reader $reader; + private FragmentFinder $finder; + + protected function setUp(): void + { + parent::setUp(); + + $csv = <<reader = Reader::createFromString($csv); + $this->finder = new FragmentFinder(); + } + + #[Test] + public function it_will_fail_instantiate_with_invalid_fallback_record(): void + { + $this->expectException(InvalidArgument::class); + + new FragmentFinder(-1); + } + + #[Test] + public function it_can_update_its_fallback_setting(): void + { + $finder = new FragmentFinder(1); + + self::assertSame(1, $finder->fallbackOffset); + self::assertSame($finder, $finder->fallbackOffset(1)); + self::assertNotSame($finder, $finder->fallbackOffset(2)); + } + + #[Test] + #[DataProvider('provideValidExpressions')] + public function it_can_select_a_specific_fragment(string $expression, ?array $expected): void + { + $result = $this->finder->first($expression, $this->reader); + if (null === $expected) { + self::assertNull($result); + + return; + } + + self::assertSame($expected, [...$result]); /* @phpstan-ignore-line */ + } + + #[Test] + #[DataProvider('provideValidExpressions')] + public function it_can_select_a_specific_fragment_or_fail(string $expression, ?array $expected): void + { + if (null === $expected) { + $this->expectException(RuntimeException::class); + + $this->finder->firstOrFail($expression, $this->reader); + + return; + } + + self::assertSame($expected, [...$this->finder->firstOrFail($expression, $this->reader)]); + } + + public static function provideValidExpressions(): iterable + { + yield 'single row' => [ + 'expression' => 'row=4', + 'expected' => [ + 0 => ['2011-01-03', '0', 'Galway'], + ], + ]; + + yield 'single row is case insensitive' => [ + 'expression' => 'RoW=4', + 'expected' => [ + 0 => ['2011-01-03', '0', 'Galway'], + ], + ]; + + yield 'row range' => [ + 'expression' => 'row=5-7', + 'expected' => [ + 0 => ['2011-01-01', '6', 'Berkeley'], + 1 => ['2011-01-02', '8', 'Berkeley'], + 2 => ['2011-01-03', '5', 'Berkeley'], + ], + ]; + + yield 'all remaining rows' => [ + 'expression' => 'row=5-*', + 'expected' => [ + 0 => ['2011-01-01', '6', 'Berkeley'], + 1 => ['2011-01-02', '8', 'Berkeley'], + 2 => ['2011-01-03', '5', 'Berkeley'], + ], + ]; + + yield 'single column' => [ + 'expression' => 'col=2', + 'expected' => [ + 0 => [1 => 'temperature'], + 1 => [1 => '1'], + 2 => [1 => '-1'], + 3 => [1 => '0'], + 4 => [1 => '6'], + 5 => [1 => '8'], + 6 => [1 => '5'], + ], + ]; + + yield 'column range' => [ + 'expression' => 'col=1-2', + 'expected' => [ + 0 => ['date', 'temperature'], + 1 => ['2011-01-01', '1'], + 2 => ['2011-01-02', '-1'], + 3 => ['2011-01-03', '0'], + 4 => ['2011-01-01', '6'], + 5 => ['2011-01-02', '8'], + 6 => ['2011-01-03', '5'], + ], + ]; + + yield 'single cell selection' => [ + 'expression' => 'cell=4,1', + 'expected' => [ + 0 => ['2011-01-03'], + ], + ]; + + yield 'single range selection' => [ + 'expression' => 'cell=4,1-6,2', + 'expected' => [ + 0 => ['2011-01-03', '0'], + 1 => ['2011-01-01', '6'], + 2 => ['2011-01-02', '8'], + ], + ]; + + yield 'single range selection without end limit' => [ + 'expression' => 'cell=5,2-*', + 'expected' => [ + 0 => [1 => '6', 2 => 'Berkeley'], + 1 => [1 => '8', 2 => 'Berkeley'], + 2 => [1 => '5', 2 => 'Berkeley'], + ], + ]; + + yield 'row range is inverted' => [ + 'expression' => 'row=4-2', + 'expected' => null, + ]; + + yield 'column range is inverted' => [ + 'expression' => 'col=4-2', + 'expected' => null, + ]; + + yield 'cell range is inverted' => [ + 'expression' => 'cell=3,3-2,2', + 'expected' => null, + ]; + + yield 'cell range is out of range for the tabular reader data' => [ + 'expression' => 'cell=3,3-30,56', + 'expected' => [ + 0 => [2 => 'Galway'], + 1 => [2 => 'Galway'], + 2 => [2 => 'Berkeley'], + 3 => [2 => 'Berkeley'], + 4 => [2 => 'Berkeley'], + ], + ]; + + yield 'single cell out of the tabular data' => [ + 'expression' => 'cell=48,12', + 'expected' => null, + ]; + } + + #[Test] + #[DataProvider('provideInvalidExpressions')] + public function it_will_fail_to_parse_the_expression(string $expression): void + { + $this->expectException(SyntaxError::class); + + $this->finder->first($expression, $this->reader); + } + + public static function provideInvalidExpressions(): iterable + { + return [ + 'missing expression type' => ['2-4'], + 'missing expression selection row' => ['row='], + 'missing expression selection cell' => ['cell='], + 'missing expression selection coll' => ['col='], + 'expression selection is invalid for cell 1' => ['cell=5'], + 'expression selection is invalid for cell 2' => ['cell=0,3'], + 'expression selection is invalid for cell 3' => ['cell=3,0'], + 'expression selection is invalid for cell 4' => ['cell=1,3-0,4'], + 'expression selection is invalid for cell 5' => ['cell=1,3-4,0'], + 'expression selection is invalid for cell 6' => ['cell=0,3-1,4'], + 'expression selection is invalid for cell 7' => ['cell=1,0-2,3'], + 'expression selection is invalid for row or column 1' => ['row=4,3'], + 'expression selection is invalid for row or column 2' => ['row=four-five'], + 'expression selection is invalid for row or column 3' => ['row=0-3'], + 'expression selection is invalid for row or column 4' => ['row=3-0'], + ]; + } + + #[Test] + public function it_returns_multiple_selections(): void + { + /** @var Generator $result */ + $result = $this->finder->all('row=1-2;5-4;2-4', $this->reader); + + self::assertCount(2, iterator_to_array($result)); + } + + #[Test] + public function it_returns_no_selection(): void + { + /** @var Generator $result */ + $result = $this->finder->all('row=5-4', $this->reader); + + self::assertCount(0, iterator_to_array($result)); + } + + #[Test] + public function it_fails_if_no_selection_is_found(): void + { + self::assertCount(1, iterator_to_array($this->finder->firstOrFail('row=7-8', $this->reader))); + } +} diff --git a/src/FragmentNotFound.php b/src/FragmentNotFound.php new file mode 100644 index 00000000..5ac3583a --- /dev/null +++ b/src/FragmentNotFound.php @@ -0,0 +1,20 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use RuntimeException; + +final class FragmentNotFound extends RuntimeException implements UnableToProcessCsv +{ +} From cfed7d32d5f28c0e37c29feeb08daeb7e76c3246 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Mon, 2 Oct 2023 23:41:07 +0200 Subject: [PATCH 3/5] Update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9d3dba7..a0959e4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ All Notable changes to `Csv` will be documented in this file ### Added -- None +- `TabulatDataReader::select` (implemented on the `Reader` and the `ResultSet` object) +- `FragmentParser` to implement [RFC7111](https://www.rfc-editor.org/rfc/rfc7111) ### Deprecated @@ -28,7 +29,7 @@ the next version. Use `TabularDataReader::getRecords` on the returned value inst - `EscapeFormula::unescapeRecord` does the opposite of `EscapeFormula::escapeRecord` - `TabularReader::each` (implemented on the `Reader` and the `ResultSet` object) - `TabularReader::exists` (implemented on the `Reader` and the `ResultSet` object) -- `TabularReader::reduce` (implemented on the `Reader` and the `ResultSet` object)**** +- `TabularReader::reduce` (implemented on the `Reader` and the `ResultSet` object) - `TabularReader::filter` (implemented on the `Reader` and the `ResultSet` object) - `TabularReader::slice` (implemented on the `Reader` and the `ResultSet` object) - `TabularReader::sorted` (implemented on the `Reader` and the `ResultSet` object) From 370d48fbeeca65eb3b5313f7a2d0e8d02bb56443 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Tue, 3 Oct 2023 16:42:25 +0200 Subject: [PATCH 4/5] Improve FragmentFinder implementation --- CHANGELOG.md | 8 +- phpunit.xml | 1 + src/AbstractCsv.php | 112 +++++------ src/FragmentFinder.php | 175 +++++++++--------- src/FragmentFinderReaderTest.php | 32 ++++ src/FragmentFinderResultSetTest.php | 32 ++++ ...derTest.php => FragmentFinderTestCase.php} | 74 ++------ src/RFC4180FieldTest.php | 4 +- src/Reader.php | 41 ++-- src/ResultSet.php | 51 +++-- src/TabularDataReader.php | 3 + 11 files changed, 302 insertions(+), 231 deletions(-) create mode 100644 src/FragmentFinderReaderTest.php create mode 100644 src/FragmentFinderResultSetTest.php rename src/{FragmentFinderTest.php => FragmentFinderTestCase.php} (77%) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0959e4d..0ded718a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,13 +6,17 @@ All Notable changes to `Csv` will be documented in this file ### Added -- `TabulatDataReader::select` (implemented on the `Reader` and the `ResultSet` object) -- `FragmentParser` to implement [RFC7111](https://www.rfc-editor.org/rfc/rfc7111) +- `TabulatDataReader::select` +- `TabulatDataReader::matching` +- `TabulatDataReader::firstMatching` +- `TabulatDataReader::firstOrFailMatching` +- `FragmentFinder` to implement [RFC7111](https://www.rfc-editor.org/rfc/rfc7111) ### Deprecated - Using the `$header` argument on `Statement::process` is deprecated and will be removed in the next version. Use `TabularDataReader::getRecords` on the returned value instead. +It's usage will trigger a `E_USER_DEPRECATED` call. ### Fixed diff --git a/phpunit.xml b/phpunit.xml index e5807cf2..a4ea478b 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -21,6 +21,7 @@ src + src src src diff --git a/src/AbstractCsv.php b/src/AbstractCsv.php index 51325337..80e769eb 100644 --- a/src/AbstractCsv.php +++ b/src/AbstractCsv.php @@ -170,34 +170,6 @@ public function getInputBOM(): string return $this->input_bom; } - /** - * DEPRECATION WARNING! This method will be removed in the next major point release. - * - * @deprecated since version 9.7.0 - * @see AbstractCsv::supportsStreamFilterOnRead - * @see AbstractCsv::supportsStreamFilterOnWrite - * - * Returns the stream filter mode. - */ - public function getStreamFilterMode(): int - { - return static::STREAM_FILTER_MODE; - } - - /** - * DEPRECATION WARNING! This method will be removed in the next major point release. - * - * @deprecated since version 9.7.0 - * @see AbstractCsv::supportsStreamFilterOnRead - * @see AbstractCsv::supportsStreamFilterOnWrite - * - * Tells whether the stream filter capabilities can be used. - */ - public function supportsStreamFilter(): bool - { - return $this->document instanceof Stream; - } - /** * Tells whether the stream filter read capabilities can be used. */ @@ -254,32 +226,6 @@ public function chunk(int $length): Generator } } - /** - * DEPRECATION WARNING! This method will be removed in the next major point release. - * - * @deprecated since version 9.1.0 - * @see AbstractCsv::toString - * - * Retrieves the CSV content - */ - public function __toString(): string - { - return $this->toString(); - } - - /** - * Retrieves the CSV content. - * - * DEPRECATION WARNING! This method will be removed in the next major point release - * - * @deprecated since version 9.7.0 - * @see AbstractCsv::toString - */ - public function getContent(): string - { - return $this->toString(); - } - /** * Retrieves the CSV content. * @@ -465,4 +411,62 @@ public function addStreamFilter(string $filtername, null|array $params = null): return $this; } + + /** + * DEPRECATION WARNING! This method will be removed in the next major point release. + * + * @deprecated since version 9.7.0 + * @see AbstractCsv::supportsStreamFilterOnRead + * @see AbstractCsv::supportsStreamFilterOnWrite + * @codeCoverageIgnore + * + * Returns the stream filter mode. + */ + public function getStreamFilterMode(): int + { + return static::STREAM_FILTER_MODE; + } + + /** + * DEPRECATION WARNING! This method will be removed in the next major point release. + * + * @deprecated since version 9.7.0 + * @see AbstractCsv::supportsStreamFilterOnRead + * @see AbstractCsv::supportsStreamFilterOnWrite + * @codeCoverageIgnore + * + * Tells whether the stream filter capabilities can be used. + */ + public function supportsStreamFilter(): bool + { + return $this->document instanceof Stream; + } + + /** + * Retrieves the CSV content. + * + * DEPRECATION WARNING! This method will be removed in the next major point release + * + * @deprecated since version 9.7.0 + * @see AbstractCsv::toString + * @codeCoverageIgnore + */ + public function getContent(): string + { + return $this->toString(); + } + + /** + * DEPRECATION WARNING! This method will be removed in the next major point release. + * + * @deprecated since version 9.1.0 + * @see AbstractCsv::toString + * @codeCoverageIgnore + * + * Retrieves the CSV content + */ + public function __toString(): string + { + return $this->toString(); + } } diff --git a/src/FragmentFinder.php b/src/FragmentFinder.php index a6b460fb..1da2b451 100644 --- a/src/FragmentFinder.php +++ b/src/FragmentFinder.php @@ -13,6 +13,8 @@ namespace League\Csv; +use Iterator; + use function array_reduce; use function count; use function explode; @@ -28,37 +30,10 @@ final class FragmentFinder private const REGEXP_ROWS_COLUMNS_SELECTION = '/^(?\d+)(-(?\d+|\*))?$/'; private const REGEXP_CELLS_SELECTION = '/^(?\d+),(?\d+)(-(?((?\d+),(?\d+))|\*))?$/'; - public readonly int $fallbackOffset; - - /** - * @throws InvalidArgument - */ - public function __construct(int $fallbackOffset = 0) - { - if (0 > $fallbackOffset) { - throw new InvalidArgument('The fallback offset must be greater or equals to 0.'); - } - - $this->fallbackOffset = $fallbackOffset; - } - /** - * @throws InvalidArgument + * @return Iterator */ - public function fallbackOffset(int $fallbackOffset = 0): self - { - return match (true) { - $fallbackOffset === $this->fallbackOffset => $this, - default => new self($fallbackOffset), - }; - } - - /** - * @throws SyntaxError if the expression can not be parsed - * - * @return iterable - */ - public function all(string $expression, TabularDataReader $tabularDataReader): iterable + public function all(string $expression, TabularDataReader $tabularDataReader): Iterator { foreach ($this->parseExpression($expression, $tabularDataReader) as $selection) { if (-1 < $selection['start']) { @@ -69,29 +44,6 @@ public function all(string $expression, TabularDataReader $tabularDataReader): i } } - /** - * @throws SyntaxError if the expression can not be parsed - * @throws FragmentNotFound if no fragment are found - * - * @return iterable - */ - public function allOrFail(string $expression, TabularDataReader $tabularDataReader): iterable - { - $selections = $this->parseExpression($expression, $tabularDataReader); - foreach ($selections as $selection) { - yield match (true) { - 0 > $selection['start'] => throw new FragmentNotFound('The expression `'.$selection['selection'].'` contains invalid selection.'), - default => $tabularDataReader - ->slice($selection['start'], $selection['length']) - ->select(...$selection['columns']), - }; - } - } - - /** - * @throws SyntaxError if the expression can not be parsed - * @throws FragmentNotFound if no fragment are found - */ public function first(string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader { foreach ($this->all($expression, $tabularDataReader) as $fragment) { @@ -102,40 +54,47 @@ public function first(string $expression, TabularDataReader $tabularDataReader): } /** - * @throws FragmentNotFound When the expression can not be use * @throws SyntaxError if the expression can not be parsed */ public function firstOrFail(string $expression, TabularDataReader $tabularDataReader): TabularDataReader { - foreach ($this->allOrFail($expression, $tabularDataReader) as $fragment) { - return $fragment; + foreach ($this->parseExpression($expression, $tabularDataReader) as $selection) { + return match ($selection['start']) { + -1 => throw new SyntaxError('The '.$selection['type'].' selection `'.$selection['selection'].'` is invalid.'), + default => $tabularDataReader + ->slice($selection['start'], $selection['length']) + ->select(...$selection['columns']), + }; } - - //@codeCoverageIgnoreStart - throw new FragmentNotFound('No fragment was found for the expression `'.$expression.'`.'); - //@codeCoverageIgnoreEnd } /** - * @throws SyntaxError if the expression can not be parsed - * - * @return non-empty-array, length:int<-1, max>, columns:array}> + * @return non-empty-array, length:int<-1, max>, columns:array}> */ private function parseExpression(string $expression, TabularDataReader $tabularDataReader): array { if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, $expression, $matches)) { - throw new SyntaxError('The query expression `'.$expression.'` is invalid.'); + return [[ + 'type' => 'unknown', + 'selection' => $expression, + 'start' => -1, + 'length' => -1, + 'columns' => [], + ]]; } $type = strtolower($matches['type']); + if ('col' === $type) { + $type = 'column'; + } - /** @var non-empty-array, length:int<-1, max>, columns:array}> $res */ + /** @var non-empty-array, length:int<-1, max>, columns:array}> $res */ $res = array_reduce( explode(';', $matches['selections']), fn (array $selections, string $selection): array => [...$selections, match ($type) { - 'row' => $this->parseRowSelection($selection), - 'col' => $this->parseColumnSelection($selection, $tabularDataReader), - default => $this->parseCellSelection($selection, $tabularDataReader), + 'row' => $this->parseRowSelection($type, $selection), + 'column' => $this->parseColumnSelection($type, $selection, $tabularDataReader), + default => $this->parseCellSelection($type, $selection, $tabularDataReader), }], [] ); @@ -144,29 +103,30 @@ private function parseExpression(string $expression, TabularDataReader $tabularD } /** - * @throws SyntaxError - * - * @return non-empty-array{selection:string, start:int, length:int, columns:array} + * @return array{type:string, selection:string, start:int, length:int, columns:array} */ - private function parseRowSelection(string $selection): array + private function parseRowSelection(string $type, string $selection): array { [$start, $end] = $this->parseRowColumnSelection($selection); return match (true) { -1 === $start, null === $end => [ + 'type' => $type, 'selection' => $selection, 'start' => $start, 'length' => 1, 'columns' => [], ], '*' === $end => [ + 'type' => $type, 'selection' => $selection, 'start' => $start, 'length' => -1, 'columns' => [], ], default => [ + 'type' => $type, 'selection' => $selection, 'start' => $start, 'length' => $end - $start + 1, @@ -176,16 +136,14 @@ private function parseRowSelection(string $selection): array } /** - * @throws SyntaxError - * - * @return non-empty-array{selection:string, start:int, length:int, columns:array} + * @return array{type:string, selection:string, start:int, length:int, columns:array} */ - private function parseColumnSelection(string $selection, TabularDataReader $tabularDataReader): array + private function parseColumnSelection(string $type, string $selection, TabularDataReader $tabularDataReader): array { [$start, $end] = $this->parseRowColumnSelection($selection); $header = $tabularDataReader->getHeader(); if ([] === $header) { - $header = $tabularDataReader->nth($this->fallbackOffset); + $header = $tabularDataReader->first(); } $nbColumns = count($header); @@ -193,12 +151,14 @@ private function parseColumnSelection(string $selection, TabularDataReader $tabu return match (true) { -1 === $start, $start >= $nbColumns => [ + 'type' => $type, 'selection' => $selection, 'start' => -1, 'length' => -1, 'columns' => [], ], null === $end => [ + 'type' => $type, 'selection' => $selection, 'start' => 0, 'length' => -1, @@ -206,12 +166,14 @@ private function parseColumnSelection(string $selection, TabularDataReader $tabu ], '*' === $end, $end > ($nbColumns - 1) => [ + 'type' => $type, 'selection' => $selection, 'start' => 0, 'length' => -1, 'columns' => range($start, $nbColumns), ], default => [ + 'type' => $type, 'selection' => $selection, 'start' => 0, 'length' => -1, @@ -221,21 +183,19 @@ private function parseColumnSelection(string $selection, TabularDataReader $tabu } /** - * @throws SyntaxError - * * @return array{int<-1, max>, int|null|'*'} */ private function parseRowColumnSelection(string $selection): array { if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [-1, 0]; } $start = $found['start']; $end = $found['end'] ?? null; $start = filter_var($start, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $start) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [-1, 0]; } --$start; @@ -245,7 +205,7 @@ private function parseRowColumnSelection(string $selection): array $end = filter_var($end, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $end) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [-1, 0]; } --$end; @@ -257,37 +217,54 @@ private function parseRowColumnSelection(string $selection): array } /** - * @throws SyntaxError - * - * @return non-empty-array{selection:string, start:int, length:int, columns:array} + * @return array{type:string, selection:string, start:int, length:int, columns:array} */ - private function parseCellSelection(string $selection, TabularDataReader $tabularDataReader): array + private function parseCellSelection(string $type, string $selection, TabularDataReader $tabularDataReader): array { if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [ + 'type' => $type, + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; } $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $cellStartRow) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [ + 'type' => $type, + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; } $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $cellStartCol) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [ + 'type' => $type, + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; } --$cellStartRow; --$cellStartCol; $header = $tabularDataReader->getHeader(); if ([] === $header) { - $header = $tabularDataReader->nth($this->fallbackOffset); + $header = $tabularDataReader->first(); } $nbColumns = count($header); if ($cellStartCol > $nbColumns - 1) { return [ + 'type' => $type, 'selection' => $selection, 'start' => -1, 'length' => 1, @@ -298,6 +275,7 @@ private function parseCellSelection(string $selection, TabularDataReader $tabula $cellEnd = $found['end'] ?? null; if (null === $cellEnd) { return [ + 'type' => $type, 'selection' => $selection, 'start' => $cellStartRow, 'length' => 1, @@ -307,6 +285,7 @@ private function parseCellSelection(string $selection, TabularDataReader $tabula if ('*' === $cellEnd) { return [ + 'type' => $type, 'selection' => $selection, 'start' => $cellStartRow, 'length' => -1, @@ -316,12 +295,24 @@ private function parseCellSelection(string $selection, TabularDataReader $tabula $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $cellEndRow) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [ + 'type' => $type, + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; } $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); if (false === $cellEndCol) { - throw new SyntaxError('The selection `'.$selection.'` is invalid.'); + return [ + 'type' => $type, + 'selection' => $selection, + 'start' => -1, + 'length' => 1, + 'columns' => [], + ]; } --$cellEndRow; @@ -329,6 +320,7 @@ private function parseCellSelection(string $selection, TabularDataReader $tabula if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) { return [ + 'type' => $type, 'selection' => $selection, 'start' => -1, 'length' => 1, @@ -337,6 +329,7 @@ private function parseCellSelection(string $selection, TabularDataReader $tabula } return [ + 'type' => $type, 'selection' => $selection, 'start' => $cellStartRow, 'length' => $cellEndRow - $cellStartRow + 1, diff --git a/src/FragmentFinderReaderTest.php b/src/FragmentFinderReaderTest.php new file mode 100644 index 00000000..007a1b6e --- /dev/null +++ b/src/FragmentFinderReaderTest.php @@ -0,0 +1,32 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +final class FragmentFinderReaderTest extends FragmentFinderTestCase +{ + protected function getFragmentIdentifierTabularData(): TabularDataReader + { + $csv = << + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv; + +use ArrayIterator; + +final class FragmentFinderResultSetTest extends FragmentFinderTestCase +{ + protected function getFragmentIdentifierTabularData(): TabularDataReader + { + return new ResultSet(new ArrayIterator([ + ['date', 'temperature', 'place'], + ['2011-01-01', '1', 'Galway'], + ['2011-01-02', '-1', 'Galway'], + ['2011-01-03', '0', 'Galway'], + ['2011-01-01', '6', 'Berkeley'], + ['2011-01-02', '8', 'Berkeley'], + ['2011-01-03', '5', 'Berkeley'], + ])); + } +} diff --git a/src/FragmentFinderTest.php b/src/FragmentFinderTestCase.php similarity index 77% rename from src/FragmentFinderTest.php rename to src/FragmentFinderTestCase.php index bf772bb8..bb8d6646 100644 --- a/src/FragmentFinderTest.php +++ b/src/FragmentFinderTestCase.php @@ -13,58 +13,21 @@ namespace League\Csv; -use Generator; use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Group; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; -use RuntimeException; -final class FragmentFinderTest extends TestCase +#[Group('tabulardata')] +abstract class FragmentFinderTestCase extends TestCase { - private Reader $reader; - private FragmentFinder $finder; - - protected function setUp(): void - { - parent::setUp(); - - $csv = <<reader = Reader::createFromString($csv); - $this->finder = new FragmentFinder(); - } - - #[Test] - public function it_will_fail_instantiate_with_invalid_fallback_record(): void - { - $this->expectException(InvalidArgument::class); - - new FragmentFinder(-1); - } - - #[Test] - public function it_can_update_its_fallback_setting(): void - { - $finder = new FragmentFinder(1); - - self::assertSame(1, $finder->fallbackOffset); - self::assertSame($finder, $finder->fallbackOffset(1)); - self::assertNotSame($finder, $finder->fallbackOffset(2)); - } + abstract protected function getFragmentIdentifierTabularData(): TabularDataReader; #[Test] #[DataProvider('provideValidExpressions')] public function it_can_select_a_specific_fragment(string $expression, ?array $expected): void { - $result = $this->finder->first($expression, $this->reader); + $result = $this->getFragmentIdentifierTabularData()->firstMatching($expression); if (null === $expected) { self::assertNull($result); @@ -79,14 +42,14 @@ public function it_can_select_a_specific_fragment(string $expression, ?array $ex public function it_can_select_a_specific_fragment_or_fail(string $expression, ?array $expected): void { if (null === $expected) { - $this->expectException(RuntimeException::class); + $this->expectException(SyntaxError::class); - $this->finder->firstOrFail($expression, $this->reader); + $this->getFragmentIdentifierTabularData()->firstOrFailMatching($expression); return; } - self::assertSame($expected, [...$this->finder->firstOrFail($expression, $this->reader)]); + self::assertSame($expected, [...$this->getFragmentIdentifierTabularData()->firstOrFailMatching($expression)]); } public static function provideValidExpressions(): iterable @@ -206,13 +169,20 @@ public static function provideValidExpressions(): iterable ]; } + #[Test] + #[DataProvider('provideInvalidExpressions')] + public function it_will_return_null_on_invalid_expression(string $expression): void + { + self::assertNull($this->getFragmentIdentifierTabularData()->firstMatching($expression)); + } + #[Test] #[DataProvider('provideInvalidExpressions')] public function it_will_fail_to_parse_the_expression(string $expression): void { $this->expectException(SyntaxError::class); - $this->finder->first($expression, $this->reader); + $this->getFragmentIdentifierTabularData()->firstOrFailMatching($expression); } public static function provideInvalidExpressions(): iterable @@ -239,24 +209,18 @@ public static function provideInvalidExpressions(): iterable #[Test] public function it_returns_multiple_selections(): void { - /** @var Generator $result */ - $result = $this->finder->all('row=1-2;5-4;2-4', $this->reader); - - self::assertCount(2, iterator_to_array($result)); + self::assertCount(2, iterator_to_array($this->getFragmentIdentifierTabularData()->matching('row=1-2;5-4;2-4'))); } #[Test] public function it_returns_no_selection(): void { - /** @var Generator $result */ - $result = $this->finder->all('row=5-4', $this->reader); - - self::assertCount(0, iterator_to_array($result)); + self::assertCount(0, iterator_to_array($this->getFragmentIdentifierTabularData()->matching('row=5-4'))); } #[Test] public function it_fails_if_no_selection_is_found(): void { - self::assertCount(1, iterator_to_array($this->finder->firstOrFail('row=7-8', $this->reader))); + self::assertCount(1, iterator_to_array($this->getFragmentIdentifierTabularData()->firstOrFailMatching('row=7-8'))); } } diff --git a/src/RFC4180FieldTest.php b/src/RFC4180FieldTest.php index 5223b9bf..b4b4bf82 100644 --- a/src/RFC4180FieldTest.php +++ b/src/RFC4180FieldTest.php @@ -54,9 +54,9 @@ public function testStreamFilterOnWrite(string $expected, array $record): void $csv = Writer::createFromPath('php://temp'); RFC4180Field::addTo($csv); self::assertContains(RFC4180Field::getFiltername(), stream_get_filters()); - $csv->setNewline("\r\n"); + $csv->setEndOfLine("\r\n"); $csv->insertOne($record); - self::assertSame($expected, $csv->getContent()); + self::assertSame($expected, $csv->toString()); } public static function bugsProvider(): array diff --git a/src/Reader.php b/src/Reader.php index 58142998..914bd406 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -343,20 +343,23 @@ public function sorted(Closure $orderBy): TabularDataReader return Statement::create()->orderBy($orderBy)->process($this); } + public function matching(string $expression): Iterator + { + return (new FragmentFinder())->all($expression, $this); + } + + public function firstMatching(string $expression): ?TabularDataReader + { + return (new FragmentFinder())->first($expression, $this); + } + /** - * @param array $header - * - * @throws Exception - * - * @return Iterator> + * @throws SyntaxError + * @throws FragmentNotFound */ - public function getRecords(array $header = []): Iterator + public function firstOrFailMatching(string $expression): TabularDataReader { - if ($header !== (array_filter($header, is_string(...)))) { - throw SyntaxError::dueToInvalidHeaderColumnNames(); - } - - return $this->combineHeader($this->prepareRecords(), $this->computeHeader($header)); + return (new FragmentFinder())->firstOrFail($expression, $this); } public function select(string|int ...$columns): TabularDataReader @@ -392,6 +395,22 @@ public function select(string|int ...$columns): TabularDataReader ); } + /** + * @param array $header + * + * @throws Exception + * + * @return Iterator> + */ + public function getRecords(array $header = []): Iterator + { + if ($header !== (array_filter($header, is_string(...)))) { + throw SyntaxError::dueToInvalidHeaderColumnNames(); + } + + return $this->combineHeader($this->prepareRecords(), $this->computeHeader($header)); + } + /** * Returns the header to be used for iteration. * diff --git a/src/ResultSet.php b/src/ResultSet.php index daf1b1f0..665a0470 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -154,22 +154,6 @@ public function sorted(Closure $orderBy): TabularDataReader return Statement::create()->orderBy($orderBy)->process($this); } - /** - * @param array $header - * - * @throws SyntaxError - * - * @return Iterator> - */ - public function getRecords(array $header = []): Iterator - { - if ($header !== array_filter($header, is_string(...))) { - throw SyntaxError::dueToInvalidHeaderColumnNames(); - } - - yield from $this->combineHeader($header); - } - public function select(string|int ...$columns): TabularDataReader { $header = []; @@ -200,6 +184,41 @@ public function select(string|int ...$columns): TabularDataReader return new self($this->combineHeader($header), $documentHeader); } + public function matching(string $expression): iterable + { + return (new FragmentFinder())->all($expression, $this); + } + + public function firstMatching(string $expression): ?TabularDataReader + { + return (new FragmentFinder())->first($expression, $this); + } + + /** + * @throws SyntaxError + * @throws FragmentNotFound + */ + public function firstOrFailMatching(string $expression): TabularDataReader + { + return (new FragmentFinder())->firstOrFail($expression, $this); + } + + /** + * @param array $header + * + * @throws SyntaxError + * + * @return Iterator> + */ + public function getRecords(array $header = []): Iterator + { + if ($header !== array_filter($header, is_string(...))) { + throw SyntaxError::dueToInvalidHeaderColumnNames(); + } + + yield from $this->combineHeader($header); + } + /** * Combines the header to each record if present. * diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php index c74a88d0..eceacadc 100644 --- a/src/TabularDataReader.php +++ b/src/TabularDataReader.php @@ -32,6 +32,9 @@ * @method TabularDataReader slice(int $offset, int $length = null) extracts a slice of $length elements starting at position $offset from the Collection. * @method TabularDataReader sorted(Closure $orderBy) sorts the Collection according to the closure provided see Statement::orderBy method * @method TabularDataReader select(string|int ...$columnOffsetOrName) extract a selection of the tabular data records columns. + * @method TabularDataReader firstOrFailMatching(string $expression) extract the first found fragment identifier of the tabular data or fail + * @method TabularDataReader|null firstMatching(string $expression) extract the first found fragment identifier of the tabular data or return null if none is found + * @method Iterator matching(string $expression) extract all found fragment identifiers for the tabular data */ interface TabularDataReader extends Countable, IteratorAggregate { From abcbeede159943c7022f3d8a6d4a40439098db62 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Tue, 3 Oct 2023 19:24:53 +0200 Subject: [PATCH 5/5] Update documentation with the new feature --- docs/9.0/reader/index.md | 285 +++----------- docs/9.0/reader/resultset.md | 512 +------------------------ docs/9.0/reader/statement.md | 209 +++++----- docs/9.0/reader/tabular-data-reader.md | 390 +++++++++++++++++++ docs/_data/menu.yml | 3 +- 5 files changed, 568 insertions(+), 831 deletions(-) create mode 100644 docs/9.0/reader/tabular-data-reader.md diff --git a/docs/9.0/reader/index.md b/docs/9.0/reader/index.md index 8ca0536c..b87e41b3 100644 --- a/docs/9.0/reader/index.md +++ b/docs/9.0/reader/index.md @@ -5,18 +5,17 @@ title: CSV document Reader connection # Reader Connection -The `League\Csv\Reader` class extends the general connections [capabilities](/9.0/connections/) to ease selecting and manipulating CSV document records. +The `League\Csv\Reader` class extends the general connections [capabilities](/9.0/connections/) to ease selecting +and manipulating CSV document records.

Starting with version 9.1.0, createFromPath will have its default set to r.

Prior to 9.1.0, by default, the mode for a Reader::createFromPath is r+ which looks for write permissions on the file and throws an Exception if the file cannot be opened with the permission set. For sake of clarity, it is strongly suggested to set r mode on the file to ensure it can be opened.

Starting with version 9.6.0, the class implements the League\Csv\TabularDataReader interface.

-

Starting with version 9.8.0, the class implements the ::fetchColumnByName and ::fetchColumnByOffset methods.

-

Starting with version 9.9.0, the class implements the ::first and ::nth methods.

-

Starting with version 9.11.0, the class implements the collections methods.

## CSV example -Many examples in this reference require a CSV file. We will use the following file `file.csv` containing the following data: +Many examples in this reference require a CSV file. We will use the following file `file.csv` +containing the following data: ```csv "First Name","Last Name",E-mail @@ -30,6 +29,8 @@ jane,jane You can set and retrieve the header offset as well as its corresponding record. +

getHeader is part of the TabularDataReader.

+ ### Description ```php @@ -55,8 +56,9 @@ If no header offset is set: - `Reader::getHeaderOffset` will return `null`.

By default no header offset is set.

- -

Because the header is lazy loaded, if you provide a positive offset for an invalid record a SyntaxError exception will be triggered when trying to access the invalid record.

+

Because the header is lazy loaded, if you provide a positive offset +for an invalid record a SyntaxError exception will be triggered when trying +to access the invalid record.

```php use League\Csv\Reader; @@ -80,7 +82,8 @@ $header_offset = $csv->getHeaderOffset(); //returns 0 $header = $csv->getHeader(); //throws a SyntaxError exception ``` -

Starting with 9.7.0 the SyntaxError exception thrown will return the list of duplicate column names.

+

Starting with 9.7.0 the SyntaxError exception thrown +will return the list of duplicate column names.

```php use League\Csv\Reader; @@ -103,10 +106,13 @@ try { public Reader::getRecords(array $header = []): Iterator ``` +

getRecords is part of the TabularDataReader.

+ ### Reader::getRecords basic usage The `Reader` class lets you access all its records using the `Reader::getRecords` method. -The method returns an `Iterator` containing all CSV document records. It will extract the records using the [CSV controls characters](/9.0/connections/controls/); +The method returns an `Iterator` containing all CSV document records. It will extract +the records using the [CSV controls characters](/9.0/connections/controls/); ```php use League\Csv\Reader; @@ -126,7 +132,8 @@ foreach ($records as $offset => $record) { ### Reader::getRecords with Reader::setHeaderOffset -If you specify the CSV header offset using `setHeaderOffset`, the found record will be combined to each CSV record to return an associative array whose keys are composed of the header values. +If you specify the CSV header offset using `setHeaderOffset`, the found record will be combined to +each CSV record to return an associative array whose keys are composed of the header values. ```php use League\Csv\Reader; @@ -147,7 +154,8 @@ foreach ($records as $offset => $record) { ### Reader::getRecords with its optional argument -Conversely, you can submit your own header record using the optional `$header` argument of the `getRecords` method. +Conversely, you can submit your own header record using the optional `$header` argument of +the `getRecords` method. ```php use League\Csv\Reader; @@ -165,7 +173,9 @@ foreach ($records as $offset => $record) { } ``` -

The optional $header argument from the Reader::getRecords takes precedence over the header offset property but its corresponding record will still be removed from the returned Iterator.

+

The optional $header argument from the Reader::getRecords +takes precedence over the header offset property but its corresponding record will still be removed +from the returned Iterator.

```php use League\Csv\Reader; @@ -185,7 +195,8 @@ foreach ($records as $offset => $record) { //the first record will still be skipped!! ``` -

In both cases, if the header record contains non-unique string values, a Exception exception is triggered.

+

In both cases, if the header record contains non-unique string values, +a Exception exception is triggered.

since 9.12.0 the optional $header is a full mapper

@@ -193,8 +204,8 @@ The argument now links the records column offset to a specific column name. In o that the array key which MUST be a positive integer or `0` will correspond to the CSV column offset and its value will represent its header value. -This means that you can re-arrange the column order as well as removing or adding column to the returned iterator. -Added column will only contain the `null` value. +This means that you can re-arrange the column order as well as removing or adding column to the +returned iterator. Added column will only contain the `null` value. Here's an example of the new behaviour. @@ -222,27 +233,6 @@ var_dump([...$records][0]); As you can see the `Count` column is missing, the `Year` and `Firstname` columns are re-arranged but present and the extra `Yolo` column is added with the value `null` -### Using the IteratorAggregate interface - -Because the `Reader` class implements the `IteratorAggregate` interface you can directly iterate over each record using the `foreach` construct and an instantiated `Reader` object. -You will get the same results as if you had called `Reader::getRecords` without its optional argument. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); -foreach ($reader as $offset => $record) { - //$offset : represents the record offset - //var_export($record) returns something like - // array( - // 'First Name' => 'john', - // 'Last Name' => 'doe', - // 'E-mail' => john.doe@example.com' - // ); -} -``` - ## Records normalization ### General Rules @@ -322,7 +312,8 @@ echo $reader->toString(); //returns the original $csv value without the formatti

New since version 9.4.0

-By default, the CSV document normalization removes empty records, but you can control the presence of such records using the following methods: +By default, the CSV document normalization removes empty records, but you can control the presence of +such records using the following methods: ```php Reader::skipEmptyRecords(): self; @@ -330,10 +321,13 @@ Reader::includeEmptyRecords(): self; Reader::isEmptyRecordsIncluded(): bool; ``` -- Calling `Reader::includeEmptyRecords` will ensure empty records are left in the `Iterator` returned by `Reader::getRecords`, -conversely `Reader::skipEmptyRecords` will ensure empty records are skipped. -- At any given time you can ask your Reader instance if empty records will be stripped or included using the `Reader::isEmptyRecordsIncluded` method. -- If no header offset is specified, the empty record will be represented by an empty `array`. Conversely, for consistency, an empty record will be represented by an array filled with `null` values as expected from header presence normalization. +- Calling `Reader::includeEmptyRecords` will ensure empty records are left in the `Iterator` returned by +`Reader::getRecords`, conversely `Reader::skipEmptyRecords` will ensure empty records are skipped. +- At any given time you can ask your Reader instance if empty records will be stripped or +included using the `Reader::isEmptyRecordsIncluded` method. +- If no header offset is specified, the empty record will be represented by an empty `array`. +Conversely, for consistency, an empty record will be represented by an array filled +with `null` values as expected from header presence normalization.

The record offset is always independent of the presence of empty records.

@@ -381,205 +375,21 @@ $res = iterator_to_array($reader, true); // ]; ``` -## Records count - -You can retrieve the number of records contained in a CSV document using PHP's `count` function because the `Reader` class implements the `Countable` interface. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -count($reader); //returns 4 -``` - -If a header offset is specified, the number of records will not take into account the header record. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); -count($reader); //returns 3 -``` - -

New since version 9.4.0

- -If empty records are to be preserved, the number of records will be affected. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file-with-two-empty-records.csv', 'r'); -$reader->isEmptyRecordsIncluded(); //returns false -count($reader); //returns 2 - -$reader->includeEmptyRecords(); -$reader->isEmptyRecordsIncluded(); //returns true -count($reader); //returns 4 -``` - -

The Countable interface is implemented using PHP's iterator_count on the Reader::getRecords method.

- -## Records selection - -### Simple Usage - -```php -public Reader::fetchColumnByName(string $columnName): Iterator -public Reader::fetchColumnByIndex(int $columnIndex = 0): Iterator -public Reader::fetchColumn(string|int $columnIndex = 0): Generator -public Reader::first(): array -public Reader::nth(int $nth_record): array -public Reader::fetchOne(int $nth_record = 0): array -public Reader::fetchPairs(string|int $offsetIndex = 0, string|int $valueIndex = 1): Generator -``` - -Using method overloading, you can directly access all retrieving methods attached to the [ResultSet](/9.0/reader/resultset/#records) object. - -#### Example - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); - -$records = $reader->fetchColumnByOffset(2); -//$records is a Generator representing all the fields of the CSV 3rd column -``` - -### Advanced Usage - -If you require a more advance record selection, you should use a [Statement](/9.0/reader/statement/) object to process the `Reader` object. The found records are returned as a [ResultSet](/9.0/reader/resultset) object. - -#### Example - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$stmt = (new Statement()) - ->offset(3) - ->limit(5) -; - -$records = $stmt->process($reader); -//$records is a League\Csv\ResultSet object -``` - -## Collection methods - -

New since version 9.11.0

- -To ease working with the loaded CSV document the following methods derived from collection are added. -Some are just wrapper methods around the `Statement` class while others use the iterable nature -of the CSV document. - -### Reader::each - -Iterates over the records in the CSV document and passes each item to a closure: - -```php -use League\Csv\Reader; -use League\Csv\Writer; - -$writer = Writer::createFromString(''); -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->each(function (array $record, int $offset) use ($writer) { - if ($offset < 10) { - return $writer->insertOne($record); - } - - return false; -}); - -//$writer will contain at most 10 lines coming from the $reader document. -// the iteration stopped when the closure return false. -``` - -### Reader::exists - -Tests for the existence of an element that satisfies the given predicate. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$exists = $reader->exists(fn (array $records) => in_array('twenty-five', $records, true)); - -//$exists returns true if at cell one cell contains the word `twenty-five` otherwise returns false, -``` - -### Reader::reduce - -Applies iteratively the given function to each element in the collection, so as to reduce the collection to -a single value. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$nbTotalCells = $reader->recude(fn (?int $carry, array $records) => ($carry ?? 0) + count($records)); - -//$records contains the total number of celle contains in the CSV documents. -``` - -### Reader::filter - -Returns all the elements of this collection for which your callback function returns `true`. The order and keys of the elements are preserved. - -

Wraps the functionality of Statement::where.

- -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = $reader->filter(fn (array $record): => 5 === count($record)); - -//$recors is a ResultSet object with only records with 5 elements -``` - -### Reader::slice - -Extracts a slice of $length elements starting at position $offset from the Collection. -If $length is `-1` it returns all elements from `$offset` to the end of the -Collection. Keys have to be preserved by this method. Calling this -method will only return the selected slice and NOT change the -elements contained in the collection slice is called on. - -

Wraps the functionality of Statement::offset -and Statement::limit.

- -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = $reader->slice(10, 25); - -//$records contains up to 25 rows starting at the offest 10 (the eleventh rows) -``` - -### Reader::sorted - -Sorts the CSV document while keeping the original keys. - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = $reader->sorted(fn (array $recordA, array $recordB) => $recordA['firstname'] <=> $recordB['firstname']); - -//$records is a ResultSet containing the sorted CSV document. -//The original $reader is not changed -``` +## Selecting records -

Wraps the functionality of Statement::orderBy.

+Please header over the [TabularDataReader documentation page](/9.0/reader/tabular-data-reader) +for more information on the class features. If you require a more advance record selection, you +should use a [Statement or a FragmentFinder](/9.0/reader/statement/) class to process the `Reader` object. The +found records are returned as a [ResultSet](/9.0/reader/resultset) object. ## Records conversion ### Json serialization -The `Reader` class implements the `JsonSerializable` interface. As such you can use the `json_encode` function directly on the instantiated object. The interface is implemented using PHP's `iterator_array` on the `Reader::getRecords` method. As such, the returned `JSON` string data depends on the presence or absence of a header. +The `Reader` class implements the `JsonSerializable` interface. As such you can use the `json_encode` +function directly on the instantiated object. The interface is implemented using PHP's +`iterator_array` on the `Reader::getRecords` method. As such, the returned `JSON` +string data depends on the presence or absence of a header. ```php use League\Csv\Reader; @@ -629,8 +439,11 @@ echo json_encode($result, JSON_PRETTY_PRINT), PHP_EOL;

The record offset is not preserved on conversion

-

To convert your CSV to JSON you must be sure its content is UTF-8 encoded, using, for instance, the library CharsetConverter stream filter.

+

To convert your CSV to JSON you must be sure its content +is UTF-8 encoded, using, for instance, the library +CharsetConverter stream filter.

### Other conversions -If you wish to convert your CSV document in `XML` or `HTML` please refer to the [converters](/9.0/converter/) bundled with this library. +If you wish to convert your CSV document in `XML` or `HTML` please refer to the [converters](/9.0/converter/) bundled +with this library. diff --git a/docs/9.0/reader/resultset.md b/docs/9.0/reader/resultset.md index 30d0b98b..a2a6bf74 100644 --- a/docs/9.0/reader/resultset.md +++ b/docs/9.0/reader/resultset.md @@ -5,515 +5,26 @@ title: Accessing Records from a CSV document # Result Set -A `League\Csv\ResultSet` object represents the associated result set of processing a [CSV document](/9.0/reader/) with a [constraint builder](/9.0/reader/statement/). This object is returned from [Statement::process](/9.0/reader/statement/#apply-the-constraints-to-a-csv-document) execution. +A `League\Csv\ResultSet` object represents the associated result set of processing a [CSV document](/9.0/reader/) with a [constraint builder](/9.0/reader/statement/). +This object is returned from [Statement::process](/9.0/reader/statement/#apply-the-constraints-to-a-csv-document) execution.

Starting with version 9.6.0, the class implements the League\Csv\TabularDataReader interface.

-

Starting with version 9.8.0, the class implements the ::fetchColumnByName and ::fetchColumnByOffset methods.

-## Information +## Selecting records -### Accessing the result set column names - -```php -public ResultSet::getHeader(): array -``` - -`ResultSet::getHeader` returns the header associated with the current object. - -#### Example: no header information was given - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -$records->getHeader(); //is empty because no header information was given -``` - -#### Example: header information given by the Reader object - -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$records = Statement::create()->process($reader); -$records->getHeader(); //returns ['First Name', 'Last Name', 'E-mail']; -``` - -#### Example: header information given by the Statement object - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$records = Statement::create()->process($reader, ['Prénom', 'Nom', 'E-mail']); -$records->getHeader(); //returns ['Prénom', 'Nom', 'E-mail']; -``` - -### Accessing the number of records in the result set - -The `ResultSet` class implements the `Countable` interface. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -count($records); //return the total number of records found -``` - -## Records - -### Description - -```php -public ResultSet::getRecords(array $header = []): Iterator -``` - -

Starting with version 9.6.0, the implemented ResultSet::getRecords method matches the same arguments and the same signature as the Reader::getRecords method.

- -To iterate over each found record you can call the `ResultSet::getRecords` method which returns a `Generator` of all records found or directly use the `foreach` construct as the class implements the `IteratorAggregate` interface: - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); - -foreach ($records->getRecords() as $record) { - //do something here -} - -foreach ($records as $record) { - //do something here -} -``` - -

since 9.12.0 the optional $header is a full mapper

- -The argument now links the records column offset to a specific column name. In other words this means -that the array key which MUST be a positive integer or `0` will correspond to the CSV column offset -and its value will represent its header value. - -This means that you can re-arrange the column order as well as removing or adding column to the returned iterator. -Added column will only contain the `null` value. - -Here's an example of the new behaviour. - -```php -use League\Csv\Reader; - -$csv = <<process($reader); -$records = $resultSet->getRecords([3 => 'Year', 0 => 'Firstname', 4 => 'Yolo']); -var_dump([...$records][0]); -//returns something like this -// array:4 [ -// "Year" => "2004", -// "Firstname" => "Abel", -// "Yolo" => null, -// ] -``` - -As you can see the `Count` column is missing, the `Year` and `Firstname` columns are re-arranged but -present and the extra `Yolo` column is added with the value `null` - -### Usage with the header - -If the `ResultSet::getHeader` is not an empty `array` the found records keys will contain the returned values. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); -$records = Statement::create()->process($reader); -$records->getHeader(); //returns ['First Name', 'Last Name', 'E-mail'] -foreach ($records as $record) { - // $record contains the following data - // array( - // 'First Name' => 'john', - // 'Last Name' => 'doe', - // 'E-mail' => 'john.doe@example.com', - // ); -} -``` - -## Selecting a specific record - -Since version 9.9.0, the class implements the `::first` and `::nth` methods. -These methods replace the `::fetchOne` method which is deprecated and will be removed in the next major release. - -These methods all return a single record from the `ResultSet`. - -```php -public ResultSet::fetchOne(int $nth_record = 0): array -public ResultSet::first(): array -public ResultSet::nth(int $nth_record): array -``` - -The `$nth_record` argument represents the nth record contained in the result set starting at `0`. -In the case of `fetchOne`, if no argument is given the method will return the first record from the result set. - -In all cases, if no record is found, an empty `array` is returned. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$stmt = Statement::create() - ->offset(10) - ->limit(12) -; -$result = $stmt->process($reader); - -$result->fetchOne(3); -$result->nth(3); -// access the 4th record from the recordset (indexing starts at 0) -// will return something like this : -// -// ['john', 'doe', 'john.doe@example.com'] -// - -$result->fetchOne(); -$result->first(); -$result->nth(0); -//returns the first matching record from the recordset or an empty record if none is found. -``` - -

nth will throw an ArgumentCountError if no argument is given to it.

- -## Selecting a single column - -```php -public ResultSet::fetchColumnByName(string $name): Iterator -public ResultSet::fetchColumnByOffset(int $offset = 0): Iterator -public ResultSet::fetchColumn(string|int $columnIndex = 0): Iterator -``` - -Since version 9.8.0, the class implements the `::fetchColumnByName` and `::fetchColumnByOffset` methods. -These methods replace the `::fetchColumn` method which is deprecated and will be removed in the next major release. - -Both methods return an `Iterator` of all values in a given column from the `ResultSet` object, but they differ in their argument type: - -`::fetchColumnByName` expects a string representing one of the values of `ResultSet::getHeader` - -```php -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); -$records = Statement::create()->process($reader); -foreach ($records->fetchColumnByName('E-mail') as $value) { - //$value is a string representing the value - //of a given record for the selected column - //$value may be equal to 'john.doe@example.com' -} -``` - -

If the ResultSet contains column names and the $name is not found, an Exception exception is thrown.

- -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$records = Statement::create()->process($reader); -foreach ($records->fetchColumnByName('foobar') as $record) { - //throw an Exception exception if - //no `foobar` column name is found - //in $records->getHeader() result -} -``` - -`::fetchColumnByOffset` expects an integer representing the column index starting from `0`; - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -foreach ($records->fetchColumnByOffset(2) as $value) { - //$value is a string representing the value - //of a given record for the selected column - //$value may be equal to 'john.doe@example.com' -} -``` - -

For both methods, if for a given record the column value is null, the record will be skipped.

- -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -count($records); //returns 10; -count(iterator_to_array($records->fetchColumnByOffset(2), false)); //returns 5 -//5 records were skipped because the column value is null -``` - -

The following paragraph describes the usage of the ::fetchColumn method which is -deprecated as of 9.8.0 and wil be removed in the next major release.

- -`ResultSet::fetchColumn` returns a `Generator` of all values in a given column from the `ResultSet` object. - -The `$columnIndex` parameter can be: - -- an integer representing the column index starting from `0`; -- a string representing one of the value of `ResultSet::getHeader`; - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -foreach ($records->fetchColumn(2) as $value) { - //$value is a string representing the value - //of a given record for the selected column - //$value may be equal to 'john.doe@example.com' -} - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$records = Statement::create()->process($reader); -foreach ($records->fetchColumn('E-mail') as $value) { - //$value is a string representing the value - //of a given record for the selected column - //$value may be equal to 'john.doe@example.com' -} -``` - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$records = Statement::create()->process($reader); -count($records); //returns 10; -count(iterator_to_array($records->fetchColumn(2), false)); //returns 5 -//5 records were skipped because the column value is null -``` - -

If the ResultSet contains column names and the $columnIndex is not found, an Exception exception is thrown.

- -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$reader->setHeaderOffset(0); - -$records = Statement::create()->process($reader); -foreach ($records->fetchColumn('foobar') as $record) { - //throw an Exception exception if - //no `foobar` column name is found - //in $records->getHeader() result -} -``` - -## Selecting key-value pairs - -`ResultSet::fetchPairs` method returns a `Generator` of key-value pairs. - -```php -public ResultSet::fetchPairs(string|int $offsetIndex = 0, string|int $valueIndex = 1): Generator -``` - -Both arguments, `$offsetIndex` and `$valueIndex` can be: - -- an integer which represents the column name index; -- a string representing the value of a column name; - -These arguments behave exactly like the `$columnIndex` from `ResultSet::fetchColumn`. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$str = <<process($reader); - -foreach ($records->fetchPairs() as $firstname => $lastname) { - // - first iteration - // $firstname -> 'john' - // $lastname -> 'doe' - // - second iteration - // $firstname -> 'jane' - // $lastname -> 'doe' - // - third iteration - // $firstname -> 'foo' - // $lastname -> 'bar' - // - fourth iteration - // $firstname -> 'sacha' - // $lastname -> null -} -``` - -### Notes - -- If no `$offsetIndex` is provided it defaults to `0`; -- If no `$valueIndex` is provided it defaults to `1`; -- If no cell is found corresponding to `$offsetIndex` the row is skipped; -- If no cell is found corresponding to `$valueIndex` the `null` value is used; - -

If the ResultSet contains column names and the submitted arguments are not found, an Exception exception is thrown.

- -## Collection methods - -

New methods added in version 9.11.

- -To ease working with the `ResultSet` the following methods derived from collection are added. -Some are just wrapper methods around the `Statement` class while others use the iterable nature -of the instance. - -### ResultSet::each - -Iterates over the records in the CSV document and passes each item to a closure: - -```php -use League\Csv\Reader; -use League\Csv\Statement; -use League\Csv\Writer; - -$writer = Writer::createFromString(''); -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); - -$resultSet = Statement::create()->process($reader); -$resultSet->each(function (array $record, int $offset) use ($writer) { - if ($offset < 10) { - return $writer->insertOne($record); - } - - return false; -}); - -//$writer will contain at most 10 lines coming from the $resultSet. -// the iteration stopped when the closure return false. -``` - -### ResultSet::exists - -Tests for the existence of an element that satisfies the given predicate. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$resultSet = Statement::create()->process($reader); - -$exists = $resultSet->exists(fn (array $records) => in_array('twenty-five', $records, true)); - -//$exists returns true if at cell one cell contains the word `twenty-five` otherwise returns false, -``` - -### Reader::reduce - -Applies iteratively the given function to each element in the collection, so as to reduce the collection to -a single value. - -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$resultSet = Statement::create()->process($reader); - -$nbTotalCells = $resultSet->recude(fn (?int $carry, array $records) => ($carry ?? 0) + count($records)); - -//$records contains the total number of celle contains in the $resultSet -``` - -### Reader::filter - -Returns all the elements of this collection for which your callback function returns `true`. The order and keys of the elements are preserved. - -

Wraps the functionality of Statement::where.

- -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$resultSet = Statement::create()->process($reader); - -$records = $resultSet->filter(fn (array $record): => 5 === count($record)); - -//$recors is a ResultSet object with only records with 5 elements -``` - -### Reader::slice - -Extracts a slice of $length elements starting at position $offset from the Collection. If $length is `-1` it returns all elements from `$offset` to the end of the Collection. -Keys have to be preserved by this method. Calling this method will only return the selected slice and NOT change the elements contained in the collection slice is called on. - -

Wraps the functionality of Statement::offset and Statement::limit.

- -```php -use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$resultSet = Statement::create()->process($reader); - -$records = $resultSet->slice(10, 25); - -//$records contains up to 25 rows starting at the offset 10 (the eleventh rows) -``` - -### Reader::sorted - -Sorts the CSV document while keeping the original keys. - -

Wraps the functionality of Statement::orderBy.

- -```php -use League\Csv\Reader; - -$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); -$resultSet = Statement::create()->process($reader); - -$records = $resultSet->sorted(fn (array $recordA, array $recordB) => $recordA['firstname'] <=> $recordB['firstname']); - -//$records is a ResultSet containing the original resultSet. -//The original ResultSet is not changed -``` +Please header over the [TabularDataReader documentation page](/9.0/reader/tabular-data-reader) +for more information on the class features. If you require a more advance record selection, you +should use a [Statement or a FragmentFinder](/9.0/reader/statement/) class to process the `Reader` object. The +found records are returned as a [ResultSet](/9.0/reader/resultset) object. ## Conversions ### Json serialization -The `ResultSet` class implements the `JsonSerializable` interface. As such you can use the `json_encode` function directly on the instantiated object. The interface is implemented using PHP's `iterator_array` on the `ResultSet::getRecords` method. As such, the returned `JSON` string data is affected by the presence or absence of column names. +The `ResultSet` class implements the `JsonSerializable` interface. As such you can use the `json_encode` +function directly on the instantiated object. The interface is implemented using PHP's `iterator_array` +on the `ResultSet::getRecords` method. As such, the returned `JSON` string data is affected by the +presence or absence of column names. ```php use League\Csv\Reader; @@ -548,7 +59,6 @@ echo json_encode($result, JSON_PRETTY_PRINT), PHP_EOL; ```

The record offset is not preserved on conversion

-

To convert your CSV records to JSON you must be sure its content is UTF-8 encoded, using, for instance, the library CharsetConverter stream filter.

### Other conversions diff --git a/docs/9.0/reader/statement.md b/docs/9.0/reader/statement.md index b887f82b..d91ca29f 100644 --- a/docs/9.0/reader/statement.md +++ b/docs/9.0/reader/statement.md @@ -3,142 +3,165 @@ layout: default title: CSV document constraint Builder --- -# Constraint Builder +# Constraint Builders -The `League\Csv\Statement` class is a constraint builder to help ease selecting records from a CSV document created using the `League\Csv\Reader` class. +The package provides two (2) convenient ways to query the `Reader` and the `ResultSet` instances. They +can be used to perform manipulation independently of the instance giving you more controls over +which records you want to access from your input document. -When building a constraint, the methods do not need to be called in any particular order, and may be called multiple times. Because the `Statement` object is immutable, each time its constraint methods are called they will return a new `Statement` object without modifying the current `Statement` object. +## Statement -

Because the Statement object is independent of the Reader object it can be re-used on multiple Reader objects.

+The first mechanism is the `League\Csv\Statement` class which is a constraint builder that more or less +mimic the behaviour of query builders in the database world. It can filter, order and limit the records +to be shown. It does so by adding and combining constraints. Once the constraint is built, it will +process your input and always return a [ResultSet](/9.0/reader/resultset) instance. Of note, the resulting constraint +can be applied on multiple documents as the instance is immutable and completely independent of +the input. -

Starting with version 9.6.0, the class exposes the Statement::create named constructor to ease object creation.

+### Retrieving all the rows -## Filtering constraint +

Starting with version 9.6.0, the class exposes the +Statement::create named constructor to ease object creation.

-The filters attached using the `Statement::where` method **are the first settings applied to the CSV before anything else**. This option follows the *First In First Out* rule. +To start using the `Statement` class you should use the `create` method. It returns a valid instance +ready to already process your document or on which you can add more constraints. Because the +`Statement` object is immutable, each time its constraint methods are called they will +return a new `Statement` object without modifying the current `Statement` object. +Once your constraint is ready to be used, use its `process` method on a `TabularDataReader` class. ```php -public Statement::where(callable $callable): self -``` - -The callable filter signature is as follows: +use League\Csv\Reader; +use League\Csv\Statement; -```php -function(array $record [, int $offset [, Iterator $iterator]]): self +$reader = Reader::createFromPath('/path/to/file.csv'); +$records = Statement::create()->process($reader); +// $records is a League\Csv\ResultSet instance ``` -It takes up to three parameters: +The `process` method returns a new `TabularDataReader` on which each constraint have been applied. +If no constraint has been added the return object will contain the same data as its input. -- `$record`: the current CSV record as an array -- `$offset`: the current CSV record offset -- `$iterator`: the current CSV iterator +

Warning: since version 9.12.0 the optional +$header argument used by the process method is deprecated.

-## Sorting constraint +### Where clauses -The sorting options are applied **after the Statement::where options**. The sorting follows the *First In First Out* rule. - -

Warning: To sort the data iterator_to_array is used, which could lead to a performance penalty if you have a heavy CSV file to sort

- -`Statement::orderBy` method adds a sorting function each time it is called. +To filter the records from your input you may use the `where` method. The method can be +called multiple time and each time it will add another constraint filter. This option +follows the *First In First Out* rule. The filter excepts a callable similar to the +one used by `array_filter`. For example the following filter will remove all the +records whose `3rd` field does not contain a valid `email`: ```php -public Statement::orderBy(callable $callable): self +use League\Csv\Reader; +use League\Csv\Statement; + +$reader = Reader::createFromPath('/path/to/file.csv'); +$records = Statement::create() + ->where(fn (array $record): bool => false !== filter_var($record[2] ?? '', FILTER_VALIDATE_EMAIL)) + ->process($reader); +// $records is a League\Csv\ResultSet instance ``` -The callable sort function signature is as follows: +### Ordering + +The `orderBy` method allows you to sort the results of the applied constraints. Just like +with filtering the method can be called multiple and the *First In First Out* rule is +also applied. The callable accepted is similar to the one used by the `usort` function. +As an example let's order the records according to the lastname found on the records. ```php -function(array $recordA, array $recordB): int -``` +use League\Csv\Reader; +use League\Csv\Statement; -The sort function takes exactly two parameters, which will be filled by pairs of records. +$reader = Reader::createFromPath('/path/to/file.csv'); +$records = Statement::create() + ->orderBy(fn (array $rA, $rB): int => strcmp($rB[1] ?? '', $rA[1] ?? ''))) + ->process($reader); +// $records is a League\Csv\ResultSet instance +``` -## Interval constraint +

Warning: To sort the data iterator_to_array is used, +which could lead to a performance penalty if you have a heavy CSV file to sort

-The interval methods enable returning a specific interval of CSV records. When called more than once, only the last filtering setting is taken into account. The interval is calculated **after applying Statement::orderBy options**. +### Limit and Offset -The interval API is made of the following methods: +You can use the `limit` and `offset` methods to limit the number of records returned. When called more than once, +only the last filtering setting will be taken into account. The `offset` specifies an optional offset for +the returned data. By default, if no offset is provided the offset equals `0`. On the other hand, the +`limit` method specifies an optional maximum records count for the returned data. By default, if +no limit is provided the limit equals `-1`, which translates to all records. We can for instance +limit the number of records to at most `5` starting from the `10`th found record. ```php -public Statement::offset(int $offset): self -public Statement::limit(int $limit): self -``` - -`Statement::offset` specifies an optional offset for the returned data. By default, if no offset is provided the offset equals `0`. +use League\Csv\Reader; +use League\Csv\Statement; -`Statement::limit` specifies an optional maximum records count for the returned data. By default, if no limit is provided the limit equals `-1`, which translates to all records. +$reader = Reader::createFromPath('/path/to/file.csv'); +$records = Statement::create() + ->limit(5) + ->offset(9) + ->process($reader); +// $records is a League\Csv\ResultSet instance +```

When called multiple times, each call overrides the last setting for these options.

-## Processing a CSV document +## FragmentFinder -```php -public Statement::process(Reader $reader, array $header = []): ResultSet -``` +

This mechanism is introduced with version 9.12.0.

-This method processes a [Reader](/9.0/reader/) object and returns the found records as a [ResultSet](/9.0/reader/resultset) object. +The second mechanism is based on [RFC7111](https://www.rfc-editor.org/rfc/rfc7111) and allow selecting +part of your document according to its rows, columns or cells coordinates. The RFC, and thus, our class +assume that your data is column size consistant and, in absence of a specified header, it will use the +first record as reference to determine the input number of columns. -```php -use League\Csv\Reader; -use League\Csv\Statement; +The RFC defines three (3) types of selections and the `FragmentFinder` class supports them all. -function filterByEmail(array $record): bool -{ - return (bool) filter_var($record[2], FILTER_VALIDATE_EMAIL); -} - -function sortByLastName(array $recordA, array $recordB): int -{ - return strcmp($recordB[1], $recordA[1]); -} - -$reader = Reader::createFromPath('/path/to/file.csv', 'r'); -$stmt = (new Statement()) - ->offset(3) - ->limit(2) - ->where('filterByEmail') - ->orderBy('sortByLastName') -; - -$records = $stmt->process($reader); -``` +You can select part of your data according to: -Just like the `Reader:getRecords`, the `Statement::process` method takes an optional `$header` argument to allow mapping CSV field names to a user defined header record. +- its row index using an expression that starts with the `row` keyword; +- its column index using an expression that starts with the `col` keyword; +- its cell coordinates using an expression that starts with the `cell` keyword; -

Using the $header argument is deprecated since version 9.12.0, -use instead the TabularDataReader::getRecords method instead on the returned value. -A E_USER_DEPRECATED notice will be triggered if the argument is used.

+Here are some selection example: -```php -use League\Csv\Reader; -use League\Csv\Statement; +- `col=5` : will select the column `4`; +- `col=5-7` : will select the columns `4` to `6` included; +- `row=5-*` : will select all the remaining rows of the document starting from the `4th` row. +- `cell=5,2-8,9` : will select the cells located between row `4` and column `1` and row `7` and column `8`; -$reader = Reader::createFromPath('/path/to/file.csv', 'r'); -$stmt = Statement::create() - ->offset(3) - ->limit(2) - ->where(fn(array $record) => (bool) filter_var($record[2], FILTER_VALIDATE_EMAIL)) - ->orderBy(fn(array $recordA, array $recordB) => strcmp($recordB[1], $recordA[1])) -; +Of note, the RFC allows for multiple disjonctive selections, separated by a `;`. To strictly +cover The RFC the class exposes the `all` method which returns an iterator containing the +results of all found fragments as distinct `TabulatDataReader` instances. -$records = $stmt->process($reader, ['firstname', 'lastname', 'email']); -``` +

If some selections are invalid no error is returned; the invalid +selection is skipped from the returned value.

+ +To restrict the returned values you may use the `first` and `firstOrFail` methods. Both methods +return on success a `TabularDataReader` instance. While the `first` method always return the +first selection found or `null`; `firstOrFail` **MUST** return a `TabularDataReader` instance +or throw. It will also throw if the expression syntax is invalid while all the other methods +just ignore the error. -

Starting with version 9.6.0, the Statement::process method can also be used on the ResultSet class because it implements the TabularDataReader interface.

+For example, with the following partially invalid expression: ```php use League\Csv\Reader; -use League\Csv\Statement; - -$reader = Reader::createFromPath('/path/to/file.csv', 'r'); -$stmt = Statement::create() - ->where(fn(array $record) => (bool) filter_var($record[2], FILTER_VALIDATE_EMAIL)) - ->orderBy(fn(array $recordA, array $recordB) => strcmp($recordB[1], $recordA[1])) -; +use League\Csv\FragmentFinder; -$resultSet = $stmt->process($reader, ['firstname', 'lastname', 'email']); +$reader = Reader::createFromPath('/path/to/file.csv'); +$finder = new FragmentFinder(); -$stmt2 = Statement::create(null, 3, 2); -$records = $stmt2->process($resultSet); -//the $records and $resultSet variables are distinct League\Csv\ResultSet instances. +$finder->all('row=7-5;8-9', $reader); // return an Iterator +$finder->first('row=7-5;8-9', $reader); // return an TabulatDataReader +$finder->firstOrFail('row=7-5;8-9', $reader); // will throw ``` + +- `FragmentFinder::all` returns an Iterator containing a single `TabularDataReader` because the first selection +is invalid; +- `FragmentFinder::first` returns the single valid `TabularDataReader` +- `FragmentFinder::firstOrFail` throws a `SyntaxError`. + +Both classes, `FragmentFinder` and `Statement` returns an instance that implements the `TabularDataReader` interface +which can be use to return the found data in a consistent way. diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md new file mode 100644 index 00000000..639b89b5 --- /dev/null +++ b/docs/9.0/reader/tabular-data-reader.md @@ -0,0 +1,390 @@ +--- +layout: default +title: Tabular Data Reader +--- + +# TabularDataReader + +Introduced in version `9.6` the `League\Csv\TabularDataReader` interfaces provides a common +API to works with tabular data like structure. Once implemented, it can be used to work +with HTML Table, simple RDBMS tables, CSV document and so forth. The only requirement are +to have: + +- a collection or records (preferably consistent in their size); +- an optional header with unique values; + +A good example of what you can achieve can be seen with the following snippet + +```php +use League\Csv\Reader; + +$records = Reader::createFromPath('/path/to/file.csv') + ->filter(fn (array $record): bool => false !== filter_var($record[2] ?? '', FILTER_VALIDATE_EMAIL)) + ->select(1, 4, 5) + ->slice(3, 5) + ->getRecords(); + +foreach ($record as $record) { + //do something meaningful with the found records +} +``` + +Once you created a `TabularDataReader` implementing instance, here we are using the `Reader` you will +be able to filter, slice and select part of your data to finally access it using the `getRecords` method. +You will also be able to process the instance using a [Statement](/9.0/reader/statement/) object. +All these methods are part of the `TabularDataReader` contract. In general, `TabularDataReader` are immutable, +meaning every `TabularDataReader` method returns an entirely new `TabularDataReader` instance +leaving your source data unchanged. + +## Available methods + +While the `TabularDataReader` is not a fully fledged collection instance it still exposes a lots of methods +that fall into the category of records collection manipulations. Because chaining is at the core of most of +its method you can be sure that each manipulation returns a new instance preserving your original data. + +## Countable, IteratorAggregate + +Any `TabularDataReader` instance implements the `Countable` and the `IteratorAggregate` interface. +It means that at any given time you can access the number of elements that are included in the instance +as well as iterate over all the record using the `foreach` structure. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromPath('/path/to/my/file.csv'); +count($reader); //returns 4 +foreach ($reader as $offset => $record) { + //iterates over the 4 records. +} +``` + +## Selection methods + +### getHeader + +The `getHeader` returns the header associated with the current object. If the current object +has no header, it will return the empty array. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromPath('/path/to/my/file.csv'); +$reader->getHeader(); //is empty because no header information was given +``` + +### getRecords + +The `getRecords` enables iterating over all records from the current object. If the optional `$header` +argument is given, it will be used as a mapper on the record and will update the record header +and the value position. + +```php +use League\Csv\Reader; + +$csv = <<process($reader); +$records = $resultSet->getRecords([3 => 'Year', 0 => 'Firstname', 4 => 'Yolo']); +var_dump([...$records][0]); +//returns something like this +// array:4 [ +// "Year" => "2004", +// "Firstname" => "Abel", +// "Yolo" => null, +// ] +``` + +

full mapper usage was completed in version 9.12 for Reader and ResultSet.

+

Added in version 9.6.0 for ResultSet.

+ +## first and nth + +You may access any record using its offset starting at `0` in the collection using the `nth` method. +if no record is found, an empty `array` is returned. + +```php +use League\Csv\Reader; +use League\Csv\Statement; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$reader->setHeaderOffset(0); + +$stmt = Statement::create() + ->offset(10) + ->limit(12) +; +$result = $stmt->process($reader); +$result->nth(3); +// access the 4th record from the recordset (indexing starts at 0) +// will return something like this : +// +// ['john', 'doe', 'john.doe@example.com'] +// + +$result->first(); +$result->nth(0); +//returns the first matching record from the recordset or an empty record if none is found. +``` + +As an alias to `nth`, the `first` method returns the first record from the instance without the need of an argument. + +

Added in version 9.9.0 for Reader and ResultSet.

+ +### fetchColumnByName + +The `fetchColumnByName` returns an Iterator containing all the values of a single column specified by its header name if it exists. + +```php +$reader = Reader::createFromPath('/path/to/my/file.csv'); +$reader->setHeaderOffset(0); +$records = Statement::create()->process($reader); +foreach ($records->fetchColumnByName('e-mail') as $value) { + //$value is a string representing the value + //of a given record for the selected column + //$value may be equal to 'john.doe@example.com' +} +``` + +

Added in version 9.8.0 for Reader and ResultSet.

+ +### fetchColumnByOffset + +The `fetchColumnByOffset` returns an Iterator containing all the values of a single column specified by its +header offset. + +```php +$reader = Reader::createFromPath('/path/to/my/file.csv'); +$reader->setHeaderOffset(0); +$records = Statement::create()->process($reader); +foreach ($records->fetchColumnByName(3) as $value) { + //$value is a string representing the value + //of a given record for the selected column + //$value may be equal to 'john.doe@example.com' +} +``` + +

Added in version 9.8.0 for Reader and ResultSet.

+ +### fetchPairs + +The `fetchPairs` method returns a Iterator of key-value pairs from two tabular data columns. The method +expect 2 arguments, both can be: + +- an integer which represents the column name index; +- a string representing the value of a column name; + +These arguments behave exactly like the `$columnIndex` from `ResultSet::fetchColumnByName` +and `ResultSet::fetchColumnByOffset`. + +```php +use League\Csv\Reader; +use League\Csv\Statement; + +$str = <<process($reader); + +foreach ($records->fetchPairs() as $firstname => $lastname) { + // - first iteration + // $firstname -> 'john' + // $lastname -> 'doe' + // - second iteration + // $firstname -> 'jane' + // $lastname -> 'doe' + // - third iteration + // $firstname -> 'foo' + // $lastname -> 'bar' + // - fourth iteration + // $firstname -> 'sacha' + // $lastname -> null +} +``` + +### Notes + +- If no `$offsetIndex` is provided it defaults to `0`; +- If no `$valueIndex` is provided it defaults to `1`; +- If no cell is found corresponding to `$offsetIndex` the row is skipped; +- If no cell is found corresponding to `$valueIndex` the `null` value is used; + +

If the TabularDataReader contains column names and the submitted arguments are not found, an Exception exception is thrown.

+ +### exists + +Tests for the existence of an element that satisfies a given predicate. + +```php +use League\Csv\Reader; +use League\Csv\Statement; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$resultSet = Statement::create()->process($reader); + +$exists = $resultSet->exists(fn (array $records) => in_array('twenty-five', $records, true)); + +//$exists returns true if at least one cell contains the word `twenty-five` otherwise returns false, +``` + +

Added in version 9.11.0 for Reader and ResultSet.

+ +## Functional methods + +### each + +The `each` method iterates over the records in the tabular data collection and passes each reacord to a +closure. + +```php +use League\Csv\Reader; +use League\Csv\Writer; + +$writer = Writer::createFromString(''); +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$reader->each(function (array $record, int $offset) use ($writer) { + if ($offset < 10) { + return $writer->insertOne($record); + } + + return false; +}); + +//$writer will contain at most 10 lines coming from the $reader document. +// the iteration stopped when the closure return false. +``` + +You may interrupt the iteration if the closure passed to `each` returns `false`. + +

Added in version 9.11.0 for Reader and ResultSet.

+ +### reduce + +The `reduce` method reduces the tabular data structure to a single value, passing +the result of each iteration into the subsequent iteration: + +```php +use League\Csv\Reader; +use League\Csv\ResultSet; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$resultSet = ResultSet::createFromTabularDataReader($reader); + +$nbTotalCells = $resultSet->recude(fn (?int $carry, array $records) => ($carry ?? 0) + count($records)); + +//$records contains the total number of celle contains in the $resultSet +``` + +The closure is similar as the one used with `array_reduce`. + +

Added in version 9.11.0 for Reader and ResultSet.

+ +## Collection methods + +The following methods return all a new `TabularDataReader` instance. +They effectively allow selecting a range of records or columns contained +within the `TabularDataReader` schema. + +### filter + +Returns all the elements of this collection for which your callback function returns `true`. The order and +keys of the elements are preserved. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$records = $reader->filter(fn (array $record): => 5 === count($record)); + +//$recors is a ResultSet object with only records with 5 elements +``` + +

Wraps the functionality of Statement::where.

+

Added in version 9.11.0 for Reader and ResultSet.

+ +### sorted + +Sorts the CSV document while keeping the original keys. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$records = $reader->sorted(fn (array $recordA, array $recordB) => $recordA['firstname'] <=> $recordB['firstname']); + +//$records is a ResultSet containing the sorted CSV document. +//The original $reader is not changed +``` + +

Wraps the functionality of Statement::orderBy.

+

Added in version 9.11.0 for Reader and ResultSet.

+ +### slice + +Extracts a slice of $length elements starting at position $offset from the Collection. If $length is `-1` it returns all elements from `$offset` to the end of the Collection. +Keys have to be preserved by this method. Calling this method will only return the selected slice and NOT change the elements contained in the collection slice is called on. + +```php +use League\Csv\Reader; +use League\Csv\Statement; + +$reader = Reader::createFromPath('/path/to/my/file.csv', 'r'); +$resultSet = Statement::create()->process($reader); + +$records = $resultSet->slice(10, 25); + +//$records is a TabularDataReader which contains up to 25 rows +//starting at the offset 10 (the eleventh rows) +``` + +

Wraps the functionality of Statement::offset and Statement::limit.

+

Added in version 9.11.0 for Reader and ResultSet.

+ +### select + +You may not always want to select all columns from the tabular data. Using the `select` method, +you can specify which columns to use. The column can be specified by their +name if the instance `getHeader` returns a non-empty array or you can use +the column offset or mix them both. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromPath('/path/to/my/file.csv') + ->select(2, 5, 8); + +//$reader is a new TabularDataReader with 3 columns +``` + +

Added in version 9.12.0 for Reader and ResultSet.

+ +### matching, firstMatching, firstOrFailMatching + +The `matching` method allows selecting all records or cells from the tabular data reader that match the +RFC7111 expression and returns a new collection containing these elements without preserving the keys. +The method wraps the functionality of `FragmentFinder::all`. Conversely, `firstMatchingz` +wraps the functionality of `FragmentFinder::first` and last but not least, +`FragmentFinder::firstOrFail` behaviour is wrap inside the `firstOrFailMatching` method. + +```php +use League\Csv\Reader; + +$reader = Reader::createFromString($csv); + +$reader->matching('row=3-1;4-6'); //returns an Iterator contains all the TabularDataReader instance that are valid. +$reader->firstMatching('row=3-1;4-6'); // will return 1 selected fragment as a TabularRaeaderData instance +$reader->firstOrFailMatching('row=3-1;4-6'); // will throw +``` + +

Wraps the functionality of FragmentFinder class.

+

Added in version 9.12.0 for Reader and ResultSet.

diff --git a/docs/_data/menu.yml b/docs/_data/menu.yml index a061c523..142d0fee 100644 --- a/docs/_data/menu.yml +++ b/docs/_data/menu.yml @@ -15,8 +15,9 @@ version: Writer Connection: '/9.0/writer/' Bundled Helpers: '/9.0/writer/helpers/' Selecting Records: + Contract: '/9.0/reader/tabular-data-reader/' + Constraint Builders: '/9.0/reader/statement/' Reader Connection: '/9.0/reader/' - Constraint Builder: '/9.0/reader/statement/' Result Set: '/9.0/reader/resultset/' Interoperability: Overview : '/9.0/interoperability/'