-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
XML Loader - writing to XML files (#1166)
* Add XML writer * Added basic support for saving nested XMLs * Added support for converting entries into attributes through naming convention * Removed dependency from SimpleXML php extension * Removed xml loader benchmarks * Removed xmlwriter extension from composer.json --------- Co-authored-by: Joseph Bielawski <stloyd@gmail.com>
- Loading branch information
1 parent
18d7f74
commit 8d05d10
Showing
27 changed files
with
1,627 additions
and
277 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Abstraction/XMLAttribute.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Flow\ETL\Adapter\XML\Abstraction; | ||
|
||
use Flow\ETL\Exception\InvalidArgumentException; | ||
|
||
final class XMLAttribute | ||
{ | ||
public function __construct( | ||
public readonly string $name, | ||
public readonly string $value | ||
) { | ||
if (!\mb_strlen($name)) { | ||
throw new InvalidArgumentException('XMLAttribute name can not be empty'); | ||
} | ||
} | ||
} |
87 changes: 87 additions & 0 deletions
87
src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Abstraction/XMLNode.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Flow\ETL\Adapter\XML\Abstraction; | ||
|
||
use Flow\ETL\Exception\InvalidArgumentException; | ||
|
||
final class XMLNode | ||
{ | ||
/** | ||
* @param string $name | ||
* @param array<XMLAttribute> $attributes | ||
* @param array<XMLNode> $children | ||
* | ||
* @throws InvalidArgumentException | ||
*/ | ||
private function __construct( | ||
public readonly string $name, | ||
public readonly ?string $value, | ||
public readonly XMLNodeType $type, | ||
public readonly array $attributes = [], | ||
public readonly array $children = [] | ||
) { | ||
if (!\mb_strlen($name)) { | ||
throw new InvalidArgumentException('XMLNode name can not be empty'); | ||
} | ||
} | ||
|
||
public static function flatNode(string $name, ?string $value) : self | ||
{ | ||
return new self($name, $value, XMLNodeType::FLAT); | ||
} | ||
|
||
public static function nestedNode(string $name) : self | ||
{ | ||
return new self($name, null, XMLNodeType::NESTED); | ||
} | ||
|
||
public function append(self|XMLAttribute $element) : self | ||
{ | ||
if ($element instanceof XMLAttribute) { | ||
return $this->appendAttribute($element); | ||
} | ||
|
||
return $this->appendChild($element); | ||
} | ||
|
||
public function appendAttribute(XMLAttribute $attribute) : self | ||
{ | ||
return new self( | ||
$this->name, | ||
$this->value, | ||
$this->type, | ||
[...$this->attributes, $attribute], | ||
$this->children | ||
); | ||
} | ||
|
||
public function appendChild(self $child) : self | ||
{ | ||
if ($this->type === XMLNodeType::FLAT) { | ||
throw new InvalidArgumentException('XMLNode can not have children if it has value'); | ||
} | ||
|
||
return new self( | ||
$this->name, | ||
$this->value, | ||
$this->type, | ||
$this->attributes, | ||
[...$this->children, $child] | ||
); | ||
} | ||
|
||
public function hasChildren() : bool | ||
{ | ||
return \count($this->children) > 0; | ||
} | ||
|
||
/** | ||
* @psalm-assert-if-true !null $this->value | ||
*/ | ||
public function hasValue() : bool | ||
{ | ||
return $this->value !== null; | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Abstraction/XMLNodeType.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Flow\ETL\Adapter\XML\Abstraction; | ||
|
||
enum XMLNodeType | ||
{ | ||
case FLAT; | ||
case NESTED; | ||
} |
99 changes: 99 additions & 0 deletions
99
src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/XMLLoader.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Flow\ETL\Adapter\XML\Loader; | ||
|
||
use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer; | ||
use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer\PHPValueNormalizer; | ||
use Flow\ETL\Adapter\XML\{RowsNormalizer, XMLWriter}; | ||
use Flow\ETL\Loader\Closure; | ||
use Flow\ETL\{FlowContext, Loader, Rows}; | ||
use Flow\Filesystem\{DestinationStream, Partition, Path}; | ||
|
||
final class XMLLoader implements Closure, Loader, Loader\FileLoader | ||
{ | ||
/** | ||
* @var array<string, int> | ||
*/ | ||
private array $writes = []; | ||
|
||
public function __construct( | ||
private readonly Path $path, | ||
private readonly string $rootElementName, | ||
private readonly string $rowElementName, | ||
private readonly string $attributePrefix, | ||
private readonly string $dateTimeFormat, | ||
private readonly XMLWriter $xmlWriter | ||
) { | ||
} | ||
|
||
public function closure(FlowContext $context) : void | ||
{ | ||
foreach ($context->streams() as $stream) { | ||
if ($stream->path()->extension() === 'xml') { | ||
$stream->append('</' . $this->rootElementName . '>'); | ||
} | ||
} | ||
|
||
$context->streams()->closeWriters($this->path); | ||
} | ||
|
||
public function destination() : Path | ||
{ | ||
return $this->path; | ||
} | ||
|
||
public function load(Rows $rows, FlowContext $context) : void | ||
{ | ||
$normalizer = new RowsNormalizer( | ||
new EntryNormalizer( | ||
new PHPValueNormalizer($context->config->caster(), $this->attributePrefix, $this->dateTimeFormat), | ||
$this->attributePrefix, | ||
$this->dateTimeFormat | ||
), | ||
$this->rowElementName | ||
); | ||
|
||
$this->write($rows, $rows->partitions()->toArray(), $context, $normalizer); | ||
} | ||
|
||
/** | ||
* @param array<Partition> $partitions | ||
*/ | ||
public function write(Rows $nextRows, array $partitions, FlowContext $context, RowsNormalizer $normalizer) : void | ||
{ | ||
$streams = $context->streams(); | ||
|
||
if (!$streams->isOpen($this->path, $partitions)) { | ||
$stream = $streams->writeTo($this->path, $partitions); | ||
|
||
if (!\array_key_exists($stream->path()->path(), $this->writes)) { | ||
$this->writes[$stream->path()->path()] = 0; | ||
} | ||
|
||
$stream->append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<" . $this->rootElementName . ">\n"); | ||
} else { | ||
$stream = $streams->writeTo($this->path, $partitions); | ||
} | ||
|
||
$this->writeXML($nextRows, $stream, $normalizer); | ||
} | ||
|
||
/** | ||
* @param Rows $rows | ||
* @param DestinationStream $stream | ||
*/ | ||
public function writeXML(Rows $rows, DestinationStream $stream, RowsNormalizer $normalizer) : void | ||
{ | ||
if (!\count($rows)) { | ||
return; | ||
} | ||
|
||
foreach ($normalizer->normalize($rows) as $node) { | ||
$stream->append($this->xmlWriter->write($node) . "\n"); | ||
} | ||
|
||
$this->writes[$stream->path()->path()]++; | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/RowsNormalizer.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Flow\ETL\Adapter\XML; | ||
|
||
use Flow\ETL\Adapter\XML\Abstraction\XMLNode; | ||
use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer; | ||
use Flow\ETL\Rows; | ||
|
||
final class RowsNormalizer | ||
{ | ||
public function __construct(private readonly EntryNormalizer $entryNormalizer, private readonly string $rowNodeName = 'row') | ||
{ | ||
} | ||
|
||
/** | ||
* @return \Generator<XMLNode> | ||
*/ | ||
public function normalize(Rows $rows) : \Generator | ||
{ | ||
foreach ($rows as $row) { | ||
$node = XMLNode::nestedNode($this->rowNodeName); | ||
|
||
foreach ($row->entries() as $entry) { | ||
$node = $node->append($this->entryNormalizer->normalize($entry)); | ||
} | ||
|
||
yield $node; | ||
} | ||
} | ||
} |
Oops, something went wrong.