Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add reader MatchingNode results and a signal to stop reading #66

Merged
merged 1 commit into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions docs/reader.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,31 @@ As a result, the reader provides a generator of XML strings that match your matc
## Example

```php
use VeeWee\Xml\Dom\Document;
use VeeWee\Xml\Dom\Configurator;
use VeeWee\Xml\Reader\Reader;
use VeeWee\Xml\Reader\Signal;
use VeeWee\Xml\Reader\Matcher;

$reader = Reader::fromXmlFile('large-data.xml');
$provider = $reader->provide(
Matcher\all(
$matcher = Matcher\all(
Matcher\node_name('item'),
Matcher\node_attribute('locale', 'nl-BE')
)
),
// Optionally, you can provide a signal to stop reading at a given point:
$signal = new Signal()
);

foreach ($provider as $nlItem) {
$dom = Document::fromXmlString($nlItem);
// Do something with it
$xml = $nlItem->xml();
$dom = $nlItem->intoDocument(Configurator\canonicalize());
$decoded = $nlItem->decode(Configurator\canonicalize());
$matched = $nlItem->matches($matcher);
$sequence = $nlItem->nodeSequence();

// If you have loaded sufficient items, you can stop reading the XML file:
$signal->stop();
}
```

Expand Down Expand Up @@ -54,7 +64,8 @@ The reader will keep only small parts of the XML in memory by reading the XML st
When the reader detects the first `breakfast_menu` element, it will ask the provided matchers if you are interested in this tag.
A matcher is a function that returns `true` when interested or `false` when it is not interested in this element.
When the matcher returns `true`, the reader will read the complete outer XML of current tag and `yield` this matching XML to your logic.
This means that the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
This XML is wrapped in a `MatchingNode` which also contains the `NodeSequence` and some handy shortcut functions to e.g. convert the XML into a DOM Document.
Do note that, the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
If you only match on the root node, it will yield the complete XML and therefore won't be memory-safe.

After deciding if you are interested in the previous tag, it jumps over to the next tag: `breakfast_menu > food[position() = 1 AND @soldOUt=false AND @bestSeller = true]` and asks the matcher if you are interested in this.
Expand Down
65 changes: 65 additions & 0 deletions src/Xml/Reader/MatchingNode.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php
declare(strict_types=1);

namespace VeeWee\Xml\Reader;

use DOMDocument;
use VeeWee\Xml\Dom\Document;
use VeeWee\Xml\Encoding\Exception\EncodingException;
use VeeWee\Xml\Exception\RuntimeException;
use VeeWee\Xml\Reader\Node\NodeSequence;
use function VeeWee\Xml\Encoding\xml_decode;

final class MatchingNode
{
/**
* @param non-empty-string $xml
*/
public function __construct(
private readonly string $xml,
private readonly NodeSequence $nodeSequence
) {
}

/**
* @return non-empty-string
*/
public function xml(): string
{
return $this->xml;
}

public function nodeSequence(): NodeSequence
{
return $this->nodeSequence;
}

/**
* @param list<callable(DOMDocument): DOMDocument> $configurators
*
* @throws RuntimeException
*/
public function intoDocument(callable ... $configurators): Document
{
return Document::fromXmlString($this->xml, ...$configurators);
}

/**
* @param list<callable(DOMDocument): DOMDocument> $configurators
*
* @throws RuntimeException
* @throws EncodingException
*/
public function decode(callable ... $configurators): array
{
return xml_decode($this->xml, ...$configurators);
}

/**
* @param callable(NodeSequence): bool $matcher
*/
public function matches(callable $matcher): bool
{
return $matcher($this->nodeSequence);
}
}
20 changes: 14 additions & 6 deletions src/Xml/Reader/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,25 @@ public static function fromXmlString(string $xml, callable ... $configurators):
/**
* @param callable(NodeSequence): bool $matcher
*
* @return Generator<string>
* @return Generator<MatchingNode>
*
* @throws RuntimeException
*/
public function provide(callable $matcher): Generator
public function provide(callable $matcher, ?Signal $signal = null): Generator
{
$signal ??= new Signal();
$reader = ($this->factory)();
$pointer = Pointer::create();

yield from stop_on_first_issue(
static fn (): bool => $reader->read(),
static function () use ($reader, $pointer, $matcher) : ?string {
static function () use ($reader, $signal): bool {
if($signal->stopRequested()) {
return !$reader->close();
}

return $reader->read();
},
static function () use ($reader, $pointer, $matcher) : ?MatchingNode {
if ($reader->nodeType === XMLReader::END_ELEMENT) {
$pointer->leaveElement();

Expand All @@ -93,13 +100,14 @@ static function () use ($reader): array {
);

$pointer->enterElement($element);
$result = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
$outerXml = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
$match = $outerXml ? new MatchingNode($outerXml, $pointer->getNodeSequence()) : null;

if ($isEmptyElement) {
$pointer->leaveElement();
}

return $result;
return $match;
}

return null;
Expand Down
19 changes: 19 additions & 0 deletions src/Xml/Reader/Signal.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);

namespace VeeWee\Xml\Reader;

final class Signal
{
private bool $stopRequested = false;

public function stop(): void
{
$this->stopRequested = true;
}

public function stopRequested(): bool
{
return $this->stopRequested;
}
}
8 changes: 5 additions & 3 deletions tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
namespace VeeWee\Tests\Xml\Reader\Configurator;

use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Reader;
use function Psl\Vec\map;
use function VeeWee\Xml\Reader\Configurator\substitute_entities;
use function VeeWee\Xml\Reader\Matcher\node_name;

Expand All @@ -21,11 +23,11 @@ public function test_it_can_substitute_entities(): void
[
'<user>my entity value</user>',
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);
}


public function test_it_can_skip_substituting_entities(): void
{
$xml = $this->buildXml();
Expand All @@ -36,7 +38,7 @@ public function test_it_can_skip_substituting_entities(): void
[
'<user>&entity;</user>',
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);
}

Expand Down
14 changes: 8 additions & 6 deletions tests/Xml/Reader/Configurator/XsdSchemaTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
use PHPUnit\Framework\TestCase;
use VeeWee\Tests\Xml\Helper\FillFileTrait;
use VeeWee\Xml\Exception\RuntimeException;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Reader;
use XMLReader;
use function Psl\Vec\map;
use function VeeWee\Xml\Reader\Configurator\xsd_schema;
use function VeeWee\Xml\Reader\Matcher\node_name;

final class XsdSchemaTest extends TestCase
{
use FillFileTrait;


public function test_it_can_iterate_if_the_schema_matches(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -37,13 +39,13 @@ public function test_it_can_iterate_if_the_schema_matches(): void
'<user>Bos</user>',
'<user>Mos</user>'
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);

fclose($xsdHandle);
}


public function test_it_triggers_an_error_on_invalid_schema(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -65,7 +67,7 @@ public function test_it_triggers_an_error_on_invalid_schema(): void
fclose($xsdHandle);
}


public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
{
$xml = '<root />';
Expand All @@ -80,7 +82,7 @@ public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
fclose($xsdHandle);
}


public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -93,7 +95,7 @@ public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): vo
fclose($xsdHandle);
}


public function test_it_can_not_set_a_schema_if_the_schema_is_invalid(): void
{
[$xsdFile, $xsdHandle] = $this->fillFile('invalid schema');
Expand Down
4 changes: 3 additions & 1 deletion tests/Xml/Reader/Matcher/AbstractMatcherTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
use Closure;
use Generator;
use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Node\NodeSequence;
use VeeWee\Xml\Reader\Reader;
use function Psl\Vec\map;

abstract class AbstractMatcherTest extends TestCase
{
Expand All @@ -23,7 +25,7 @@ abstract public static function provideMatcherCases(): Generator;
public function test_real_xml_cases(Closure $matcher, string $xml, array $expected)
{
$reader = Reader::fromXmlString($xml);
$actual = [...$reader->provide($matcher)];
$actual = map($reader->provide($matcher), static fn (MatchingNode $match): string => $match->xml());

static::assertSame($actual, $expected);
}
Expand Down
73 changes: 73 additions & 0 deletions tests/Xml/Reader/MatchingNodeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php
declare(strict_types=1);

namespace VeeWee\Tests\Xml\Reader;

use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Node\ElementNode;
use VeeWee\Xml\Reader\Node\NodeSequence;
use function Psl\Fun\identity;
use function VeeWee\Xml\Dom\Locator\document_element;
use function VeeWee\Xml\Dom\Mapper\xml_string;
use function VeeWee\Xml\Reader\Matcher\element_name;

final class MatchingNodeTest extends TestCase
{

public function test_it_is_a_matching_node(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
$sequence = new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

static::assertSame($xml, $match->xml());
static::assertSame($sequence, $match->nodeSequence());
}


public function test_it_can_match(): void
{
$match = new MatchingNode(
'<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

static::assertTrue($match->matches(element_name('hello')));
static::assertFalse($match->matches(element_name('world')));
}


public function test_it_can_transform_into_a_dom_document(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

$document = $match->intoDocument(identity());

static::assertSame($xml, xml_string()($document->map(document_element())));
}

public function test_it_can_decode_the_xml(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

$decoded = $match->decode(identity());

static::assertSame(['hello' => ''], $decoded);
}
}
Loading