From 54bb7eca27785975c3f04f3f00df3c8bd6bd994c Mon Sep 17 00:00:00 2001 From: Toon Verwerft Date: Sat, 6 Jan 2024 14:31:10 +0100 Subject: [PATCH] Add reader MatchingNode results and a signal to stop reading --- docs/reader.md | 21 ++++-- src/Xml/Reader/MatchingNode.php | 65 +++++++++++++++++ src/Xml/Reader/Reader.php | 20 +++-- src/Xml/Reader/Signal.php | 19 +++++ .../Configurator/SubstituteEntitiesTest.php | 8 +- .../Xml/Reader/Configurator/XsdSchemaTest.php | 14 ++-- .../Reader/Matcher/AbstractMatcherTest.php | 4 +- tests/Xml/Reader/MatchingNodeTest.php | 73 +++++++++++++++++++ tests/Xml/Reader/ReaderTest.php | 31 +++++++- tests/Xml/Reader/SignalTest.php | 20 +++++ 10 files changed, 252 insertions(+), 23 deletions(-) create mode 100644 src/Xml/Reader/MatchingNode.php create mode 100644 src/Xml/Reader/Signal.php create mode 100644 tests/Xml/Reader/MatchingNodeTest.php create mode 100644 tests/Xml/Reader/SignalTest.php diff --git a/docs/reader.md b/docs/reader.md index 51b09cc..c002595 100644 --- a/docs/reader.md +++ b/docs/reader.md @@ -7,21 +7,31 @@ As a result, the reader provides a generator of XML strings that match your matc ## Example ```php -use VeeWee\Xml\Dom\Document; +use VeeWee\Xml\Dom\Configurator; use VeeWee\Xml\Reader\Reader; +use VeeWee\Xml\Reader\Signal; use VeeWee\Xml\Reader\Matcher; $reader = Reader::fromXmlFile('large-data.xml'); $provider = $reader->provide( - Matcher\all( + $matcher = Matcher\all( Matcher\node_name('item'), Matcher\node_attribute('locale', 'nl-BE') - ) + ), + // Optionally, you can provide a signal to stop reading at a given point: + $signal = new Signal() ); foreach ($provider as $nlItem) { - $dom = Document::fromXmlString($nlItem); // Do something with it + $xml = $nlItem->xml(); + $dom = $nlItem->intoDocument(Configurator\canonicalize()); + $decoded = $nlItem->decode(Configurator\canonicalize()); + $matched = $nlItem->matches($matcher); + $sequence = $nlItem->nodeSequence(); + + // If you have loaded sufficient items, you can stop reading the XML file: + $signal->stop(); } ``` @@ -54,7 +64,8 @@ The reader will keep only small parts of the XML in memory by reading the XML st When the reader detects the first `breakfast_menu` element, it will ask the provided matchers if you are interested in this tag. A matcher is a function that returns `true` when interested or `false` when it is not interested in this element. When the matcher returns `true`, the reader will read the complete outer XML of current tag and `yield` this matching XML to your logic. -This means that the memory-safety of YOUR reader is based on the part inside the XML you are interested in: +This XML is wrapped in a `MatchingNode` which also contains the `NodeSequence` and some handy shortcut functions to e.g. convert the XML into a DOM Document. +Do note that, the memory-safety of YOUR reader is based on the part inside the XML you are interested in: If you only match on the root node, it will yield the complete XML and therefore won't be memory-safe. After deciding if you are interested in the previous tag, it jumps over to the next tag: `breakfast_menu > food[position() = 1 AND @soldOUt=false AND @bestSeller = true]` and asks the matcher if you are interested in this. diff --git a/src/Xml/Reader/MatchingNode.php b/src/Xml/Reader/MatchingNode.php new file mode 100644 index 0000000..49b5a9c --- /dev/null +++ b/src/Xml/Reader/MatchingNode.php @@ -0,0 +1,65 @@ +xml; + } + + public function nodeSequence(): NodeSequence + { + return $this->nodeSequence; + } + + /** + * @param list $configurators + * + * @throws RuntimeException + */ + public function intoDocument(callable ... $configurators): Document + { + return Document::fromXmlString($this->xml, ...$configurators); + } + + /** + * @param list $configurators + * + * @throws RuntimeException + * @throws EncodingException + */ + public function decode(callable ... $configurators): array + { + return xml_decode($this->xml, ...$configurators); + } + + /** + * @param callable(NodeSequence): bool $matcher + */ + public function matches(callable $matcher): bool + { + return $matcher($this->nodeSequence); + } +} diff --git a/src/Xml/Reader/Reader.php b/src/Xml/Reader/Reader.php index ec4ab30..2138be5 100644 --- a/src/Xml/Reader/Reader.php +++ b/src/Xml/Reader/Reader.php @@ -60,18 +60,25 @@ public static function fromXmlString(string $xml, callable ... $configurators): /** * @param callable(NodeSequence): bool $matcher * - * @return Generator + * @return Generator * * @throws RuntimeException */ - public function provide(callable $matcher): Generator + public function provide(callable $matcher, ?Signal $signal = null): Generator { + $signal ??= new Signal(); $reader = ($this->factory)(); $pointer = Pointer::create(); yield from stop_on_first_issue( - static fn (): bool => $reader->read(), - static function () use ($reader, $pointer, $matcher) : ?string { + static function () use ($reader, $signal): bool { + if($signal->stopRequested()) { + return !$reader->close(); + } + + return $reader->read(); + }, + static function () use ($reader, $pointer, $matcher) : ?MatchingNode { if ($reader->nodeType === XMLReader::END_ELEMENT) { $pointer->leaveElement(); @@ -93,13 +100,14 @@ static function () use ($reader): array { ); $pointer->enterElement($element); - $result = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null; + $outerXml = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null; + $match = $outerXml ? new MatchingNode($outerXml, $pointer->getNodeSequence()) : null; if ($isEmptyElement) { $pointer->leaveElement(); } - return $result; + return $match; } return null; diff --git a/src/Xml/Reader/Signal.php b/src/Xml/Reader/Signal.php new file mode 100644 index 0000000..2c2e34e --- /dev/null +++ b/src/Xml/Reader/Signal.php @@ -0,0 +1,19 @@ +stopRequested = true; + } + + public function stopRequested(): bool + { + return $this->stopRequested; + } +} diff --git a/tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php b/tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php index 7549cdb..37e33e9 100644 --- a/tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php +++ b/tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php @@ -5,7 +5,9 @@ namespace VeeWee\Tests\Xml\Reader\Configurator; use PHPUnit\Framework\TestCase; +use VeeWee\Xml\Reader\MatchingNode; use VeeWee\Xml\Reader\Reader; +use function Psl\Vec\map; use function VeeWee\Xml\Reader\Configurator\substitute_entities; use function VeeWee\Xml\Reader\Matcher\node_name; @@ -21,11 +23,11 @@ public function test_it_can_substitute_entities(): void [ 'my entity value', ], - [...$iterator] + map($iterator, static fn (MatchingNode $match): string => $match->xml()) ); } - + public function test_it_can_skip_substituting_entities(): void { $xml = $this->buildXml(); @@ -36,7 +38,7 @@ public function test_it_can_skip_substituting_entities(): void [ '&entity;', ], - [...$iterator] + map($iterator, static fn (MatchingNode $match): string => $match->xml()) ); } diff --git a/tests/Xml/Reader/Configurator/XsdSchemaTest.php b/tests/Xml/Reader/Configurator/XsdSchemaTest.php index c4855d3..86df9a3 100644 --- a/tests/Xml/Reader/Configurator/XsdSchemaTest.php +++ b/tests/Xml/Reader/Configurator/XsdSchemaTest.php @@ -7,8 +7,10 @@ use PHPUnit\Framework\TestCase; use VeeWee\Tests\Xml\Helper\FillFileTrait; use VeeWee\Xml\Exception\RuntimeException; +use VeeWee\Xml\Reader\MatchingNode; use VeeWee\Xml\Reader\Reader; use XMLReader; +use function Psl\Vec\map; use function VeeWee\Xml\Reader\Configurator\xsd_schema; use function VeeWee\Xml\Reader\Matcher\node_name; @@ -16,7 +18,7 @@ final class XsdSchemaTest extends TestCase { use FillFileTrait; - + public function test_it_can_iterate_if_the_schema_matches(): void { [$xsdFile, $xsdHandle] = $this->createXsdFile(); @@ -37,13 +39,13 @@ public function test_it_can_iterate_if_the_schema_matches(): void 'Bos', 'Mos' ], - [...$iterator] + map($iterator, static fn (MatchingNode $match): string => $match->xml()) ); fclose($xsdHandle); } - + public function test_it_triggers_an_error_on_invalid_schema(): void { [$xsdFile, $xsdHandle] = $this->createXsdFile(); @@ -65,7 +67,7 @@ public function test_it_triggers_an_error_on_invalid_schema(): void fclose($xsdHandle); } - + public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void { $xml = ''; @@ -80,7 +82,7 @@ public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void fclose($xsdHandle); } - + public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): void { [$xsdFile, $xsdHandle] = $this->createXsdFile(); @@ -93,7 +95,7 @@ public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): vo fclose($xsdHandle); } - + public function test_it_can_not_set_a_schema_if_the_schema_is_invalid(): void { [$xsdFile, $xsdHandle] = $this->fillFile('invalid schema'); diff --git a/tests/Xml/Reader/Matcher/AbstractMatcherTest.php b/tests/Xml/Reader/Matcher/AbstractMatcherTest.php index ec25b1b..cb490d2 100644 --- a/tests/Xml/Reader/Matcher/AbstractMatcherTest.php +++ b/tests/Xml/Reader/Matcher/AbstractMatcherTest.php @@ -6,8 +6,10 @@ use Closure; use Generator; use PHPUnit\Framework\TestCase; +use VeeWee\Xml\Reader\MatchingNode; use VeeWee\Xml\Reader\Node\NodeSequence; use VeeWee\Xml\Reader\Reader; +use function Psl\Vec\map; abstract class AbstractMatcherTest extends TestCase { @@ -23,7 +25,7 @@ abstract public static function provideMatcherCases(): Generator; public function test_real_xml_cases(Closure $matcher, string $xml, array $expected) { $reader = Reader::fromXmlString($xml); - $actual = [...$reader->provide($matcher)]; + $actual = map($reader->provide($matcher), static fn (MatchingNode $match): string => $match->xml()); static::assertSame($actual, $expected); } diff --git a/tests/Xml/Reader/MatchingNodeTest.php b/tests/Xml/Reader/MatchingNodeTest.php new file mode 100644 index 0000000..126895e --- /dev/null +++ b/tests/Xml/Reader/MatchingNodeTest.php @@ -0,0 +1,73 @@ +', + $sequence = new NodeSequence( + new ElementNode(1, 'hello', 'hello', '', '', []) + ) + ); + + static::assertSame($xml, $match->xml()); + static::assertSame($sequence, $match->nodeSequence()); + } + + + public function test_it_can_match(): void + { + $match = new MatchingNode( + '', + new NodeSequence( + new ElementNode(1, 'hello', 'hello', '', '', []) + ) + ); + + static::assertTrue($match->matches(element_name('hello'))); + static::assertFalse($match->matches(element_name('world'))); + } + + + public function test_it_can_transform_into_a_dom_document(): void + { + $match = new MatchingNode( + $xml = '', + new NodeSequence( + new ElementNode(1, 'hello', 'hello', '', '', []) + ) + ); + + $document = $match->intoDocument(identity()); + + static::assertSame($xml, xml_string()($document->map(document_element()))); + } + + public function test_it_can_decode_the_xml(): void + { + $match = new MatchingNode( + $xml = '', + new NodeSequence( + new ElementNode(1, 'hello', 'hello', '', '', []) + ) + ); + + $decoded = $match->decode(identity()); + + static::assertSame(['hello' => ''], $decoded); + } +} diff --git a/tests/Xml/Reader/ReaderTest.php b/tests/Xml/Reader/ReaderTest.php index 940eb84..70125d2 100644 --- a/tests/Xml/Reader/ReaderTest.php +++ b/tests/Xml/Reader/ReaderTest.php @@ -7,11 +7,15 @@ use PHPUnit\Framework\TestCase; use VeeWee\Tests\Xml\Helper\FillFileTrait; use VeeWee\Xml\Exception\RuntimeException; +use VeeWee\Xml\Reader\MatchingNode; use VeeWee\Xml\Reader\Node\NodeSequence; use VeeWee\Xml\Reader\Reader; +use VeeWee\Xml\Reader\Signal; use function Psl\Fun\identity; +use function Psl\Vec\map; use function VeeWee\Xml\Reader\Loader\xml_string_loader; use function VeeWee\Xml\Reader\Matcher\all; +use function VeeWee\Xml\Reader\Matcher\element_name; use function VeeWee\Xml\Reader\Matcher\node_attribute; use function VeeWee\Xml\Reader\Matcher\node_name; @@ -27,7 +31,7 @@ public function test_it_can_provide_xml_string(string $xml, callable $matcher, a $reader = Reader::fromXmlString($xml, identity()); $iterator = $reader->provide($matcher); - static::assertSame($expected, [...$iterator]); + static::assertSame($expected, map($iterator, static fn (MatchingNode $match): string => $match->xml())); } /** @@ -40,7 +44,7 @@ public function test_it_can_provide_xml_file(string $xml, callable $matcher, arr $reader = Reader::fromXmlFile($file, identity()); $iterator = $reader->provide($matcher); - static::assertSame($expected, [...$iterator]); + static::assertSame($expected, map($iterator, static fn (MatchingNode $match): string => $match->xml())); fclose($handle); } @@ -64,6 +68,29 @@ public function test_it_throws_exception_on_invalid_xml_during_iteration(): void [...$iterator]; } + + public function test_it_can_send_stop_signal(): void + { + $xml = <<<'EOXML' + + Jos + Bos + Mos + + EOXML; + + $reader = Reader::fromXmlString($xml); + $signal = new Signal(); + + $actual = []; + foreach ($reader->provide(element_name('user'), $signal) as $match) { + $actual[] = $match->xml(); + $signal->stop(); + } + + static::assertSame(['Jos'], $actual); + } + public function provideXmlExpectations() { yield 'simple' => [ diff --git a/tests/Xml/Reader/SignalTest.php b/tests/Xml/Reader/SignalTest.php new file mode 100644 index 0000000..7919689 --- /dev/null +++ b/tests/Xml/Reader/SignalTest.php @@ -0,0 +1,20 @@ +stopRequested()); + + $signal->stop(); + static::assertTrue($signal->stopRequested()); + } +}