Skip to content

Commit

Permalink
XML Loader - writing to XML files (#1166)
Browse files Browse the repository at this point in the history
* Add XML writer

* Added basic support for saving nested XMLs

* Added support for converting entries into attributes through naming convention

* Removed dependency from SimpleXML php extension

* Removed xml loader benchmarks

* Removed xmlwriter extension from composer.json

---------

Co-authored-by: Joseph Bielawski <stloyd@gmail.com>
  • Loading branch information
norberttech and stloyd authored Aug 4, 2024
1 parent 18d7f74 commit 8d05d10
Show file tree
Hide file tree
Showing 27 changed files with 1,627 additions and 277 deletions.
6 changes: 4 additions & 2 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use function Flow\ETL\DSL\type_string;
use Flow\ETL\PHP\Type\Caster;
use Flow\ETL\Row\Entry\UuidEntry;
use Flow\ETL\Row\Entry\{UuidEntry, XMLEntry};
use Flow\ETL\Row\Schema;
use Flow\ETL\Rows;

Expand All @@ -29,6 +29,7 @@ public function normalize(Rows $rows, Schema $schema) : array
foreach ($row->entries() as $entry) {
$columns[$entry->name()] = match ($entry::class) {
UuidEntry::class => $this->caster->to(type_string())->value($entry->value()),
XMLEntry::class => $this->caster->to(type_string())->value($entry->value()),
default => $this->caster->to($schema->getDefinition($entry->ref())->type())->value($entry->value()),
};
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\XML\Abstraction;

use Flow\ETL\Exception\InvalidArgumentException;

final class XMLAttribute
{
public function __construct(
public readonly string $name,
public readonly string $value
) {
if (!\mb_strlen($name)) {
throw new InvalidArgumentException('XMLAttribute name can not be empty');
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\XML\Abstraction;

use Flow\ETL\Exception\InvalidArgumentException;

final class XMLNode
{
/**
* @param string $name
* @param array<XMLAttribute> $attributes
* @param array<XMLNode> $children
*
* @throws InvalidArgumentException
*/
private function __construct(
public readonly string $name,
public readonly ?string $value,
public readonly XMLNodeType $type,
public readonly array $attributes = [],
public readonly array $children = []
) {
if (!\mb_strlen($name)) {
throw new InvalidArgumentException('XMLNode name can not be empty');
}
}

public static function flatNode(string $name, ?string $value) : self
{
return new self($name, $value, XMLNodeType::FLAT);
}

public static function nestedNode(string $name) : self
{
return new self($name, null, XMLNodeType::NESTED);
}

public function append(self|XMLAttribute $element) : self
{
if ($element instanceof XMLAttribute) {
return $this->appendAttribute($element);
}

return $this->appendChild($element);
}

public function appendAttribute(XMLAttribute $attribute) : self
{
return new self(
$this->name,
$this->value,
$this->type,
[...$this->attributes, $attribute],
$this->children
);
}

public function appendChild(self $child) : self
{
if ($this->type === XMLNodeType::FLAT) {
throw new InvalidArgumentException('XMLNode can not have children if it has value');
}

return new self(
$this->name,
$this->value,
$this->type,
$this->attributes,
[...$this->children, $child]
);
}

public function hasChildren() : bool
{
return \count($this->children) > 0;
}

/**
* @psalm-assert-if-true !null $this->value
*/
public function hasValue() : bool
{
return $this->value !== null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\XML\Abstraction;

enum XMLNodeType
{
case FLAT;
case NESTED;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\XML\Loader;

use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer;
use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer\PHPValueNormalizer;
use Flow\ETL\Adapter\XML\{RowsNormalizer, XMLWriter};
use Flow\ETL\Loader\Closure;
use Flow\ETL\{FlowContext, Loader, Rows};
use Flow\Filesystem\{DestinationStream, Partition, Path};

final class XMLLoader implements Closure, Loader, Loader\FileLoader
{
/**
* @var array<string, int>
*/
private array $writes = [];

public function __construct(
private readonly Path $path,
private readonly string $rootElementName,
private readonly string $rowElementName,
private readonly string $attributePrefix,
private readonly string $dateTimeFormat,
private readonly XMLWriter $xmlWriter
) {
}

public function closure(FlowContext $context) : void
{
foreach ($context->streams() as $stream) {
if ($stream->path()->extension() === 'xml') {
$stream->append('</' . $this->rootElementName . '>');
}
}

$context->streams()->closeWriters($this->path);
}

public function destination() : Path
{
return $this->path;
}

public function load(Rows $rows, FlowContext $context) : void
{
$normalizer = new RowsNormalizer(
new EntryNormalizer(
new PHPValueNormalizer($context->config->caster(), $this->attributePrefix, $this->dateTimeFormat),
$this->attributePrefix,
$this->dateTimeFormat
),
$this->rowElementName
);

$this->write($rows, $rows->partitions()->toArray(), $context, $normalizer);
}

/**
* @param array<Partition> $partitions
*/
public function write(Rows $nextRows, array $partitions, FlowContext $context, RowsNormalizer $normalizer) : void
{
$streams = $context->streams();

if (!$streams->isOpen($this->path, $partitions)) {
$stream = $streams->writeTo($this->path, $partitions);

if (!\array_key_exists($stream->path()->path(), $this->writes)) {
$this->writes[$stream->path()->path()] = 0;
}

$stream->append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<" . $this->rootElementName . ">\n");
} else {
$stream = $streams->writeTo($this->path, $partitions);
}

$this->writeXML($nextRows, $stream, $normalizer);
}

/**
* @param Rows $rows
* @param DestinationStream $stream
*/
public function writeXML(Rows $rows, DestinationStream $stream, RowsNormalizer $normalizer) : void
{
if (!\count($rows)) {
return;
}

foreach ($normalizer->normalize($rows) as $node) {
$stream->append($this->xmlWriter->write($node) . "\n");
}

$this->writes[$stream->path()->path()]++;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\XML;

use Flow\ETL\Adapter\XML\Abstraction\XMLNode;
use Flow\ETL\Adapter\XML\RowsNormalizer\EntryNormalizer;
use Flow\ETL\Rows;

final class RowsNormalizer
{
public function __construct(private readonly EntryNormalizer $entryNormalizer, private readonly string $rowNodeName = 'row')
{
}

/**
* @return \Generator<XMLNode>
*/
public function normalize(Rows $rows) : \Generator
{
foreach ($rows as $row) {
$node = XMLNode::nestedNode($this->rowNodeName);

foreach ($row->entries() as $entry) {
$node = $node->append($this->entryNormalizer->normalize($entry));
}

yield $node;
}
}
}
Loading

0 comments on commit 8d05d10

Please sign in to comment.