Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Dremel to properly shred/assemble nested structures with nullable elements #778

Merged
merged 2 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 43 additions & 61 deletions src/lib/dremel/src/Flow/Dremel/Dremel.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use function Flow\Parquet\array_flatten;
use Flow\Dremel\Exception\InvalidArgumentException;
use Flow\Dremel\Exception\RuntimeException;

final class Dremel
{
Expand All @@ -19,67 +18,54 @@ public function __construct()
*
* @psalm-suppress UndefinedInterfaceMethod
*/
public function assemble(array $repetitions, array $definitions, array $values) : \Generator
public function assemble(array $repetitions, array $definitions, array $values) : array
{
$this->assertInput($repetitions, $definitions);

$maxDefinitionLevel = \count($definitions) ? \max($definitions) : 0;
$maxRepetitionLevel = \count($repetitions) ? \max($repetitions) : 0;

$output = [];
$valueIndex = 0;

if ($maxRepetitionLevel === 0) {
foreach ($definitions as $definition) {
if ($definition === 0) {
yield null;
$output[] = null;
} elseif ($definition === $maxDefinitionLevel) {
yield $values[$valueIndex];
$output[] = $values[$valueIndex];
$valueIndex++;
}
}

return;
return $output;
}

$stack = new Stack();

foreach ($definitions as $definitionIndex => $definition) {
$repetition = $repetitions[$definitionIndex];

if ($repetition === 0) {
if ($stack->size()) {
yield $stack->dropFlat();
$stack->clear();
$stack->push(new ListNode($maxRepetitionLevel));
} else {
$stack->push(new ListNode($maxRepetitionLevel));
}
if ($repetition === 0 && $definition !== 0) {
$stack->push(new ListNode($maxRepetitionLevel));
}

if ($repetition === 0 && $definition === 0) {
yield null;
$stack->clear();
} else {
if ($repetition <= $maxRepetitionLevel && $repetition > 0) {
/** @phpstan-ignore-next-line */
$stack->last()->push(
$this->value($definition, $maxDefinitionLevel, $values, $valueIndex),
$repetition
);
} elseif ($repetition === 0) {
/** @phpstan-ignore-next-line */
$stack->last()->push(
$this->value($definition, $maxDefinitionLevel, $values, $valueIndex),
$maxRepetitionLevel
);
}
$stack->push(new NullNode());

continue;
}
}

if ($stack->size()) {
yield $stack->dropFlat();
$stack->clear();
if ($definition + 1 >= $maxDefinitionLevel) {
/** @phpstan-ignore-next-line */
$stack->last()->push(
$this->value($definition, $maxDefinitionLevel, $values, $valueIndex),
$repetition === 0 ? $maxRepetitionLevel : $repetition
);
}
}

return $stack->dropFlat();
}

/**
Expand All @@ -89,9 +75,10 @@ public function shred(array $data, int $maxDefinitionLevel) : DataShredded
{
$definitions = [];
$this->buildDefinitions($data, $definitions, $maxDefinitionLevel);
$repetitions = $this->buildRepetitions($data);

return new DataShredded(
$this->buildRepetitions($data),
$repetitions,
$definitions,
\array_values(\array_filter(array_flatten($data), static fn ($item) => $item !== null))
);
Expand All @@ -112,55 +99,50 @@ private function assertInput(array $repetitions, array $definitions) : void
}
}

private function buildDefinitions(array $data, array &$definitions, int $maxDefinitionLevel) : void
private function buildDefinitions(array $data, array &$definitions, int $maxDefinitionLevel, int $level = 1) : void
{
$previousElementType = null;

foreach ($data as $key => $value) {
if (\is_array($value)) {
// Recursively call the function if the value is an array
$this->buildDefinitions($value, $definitions, $maxDefinitionLevel);
if (!\count($value)) {
$definitions[] = $level;
} else {
$this->buildDefinitions($value, $definitions, $maxDefinitionLevel, $level + 1);
}
} else {
if ($value === null) {
$definitions[] = 0;
if ($level === 1 || $previousElementType === 'array') {
$definitions[] = 0;
} else {
$definitions[] = $level;
}
} else {
$definitions[] = $maxDefinitionLevel;
}
}

$previousElementType = \gettype($value);
}
}

private function buildRepetitions(array $data, int $currentLevel = 0, bool $newRow = true) : array
private function buildRepetitions(array $data, int $currentLevel = 0, int $topIndex = 0) : array
{
$output = [];

foreach ($data as $item) {
foreach ($data as $index => $item) {
if (\is_array($item)) {
$currentLevel++;

$valueTypes = [];

foreach ($item as $subItem) {
$valueTypes[] = \gettype($subItem);
}
if (!\count($item)) {
$output[] = 0;

if (\count(\array_unique($valueTypes)) !== 1) {
throw new RuntimeException('Invalid data structure, each row must be an array of arrays or scalars, got both, arrays and scalars. ' . \json_encode($item, \JSON_THROW_ON_ERROR));
continue;
}

$newRow = true;

foreach ($item as $subItem) {
if (\is_array($subItem)) {
$output = \array_merge($output, $this->buildRepetitions($subItem, $currentLevel + 1, $newRow));
} else {
$output[] = $newRow ? 0 : $currentLevel;
}

$newRow = false;
}
$currentLevel--;
$output = \array_merge($output, $this->buildRepetitions($item, $currentLevel + 1, $index));
} else {
if (!\count($output)) {
$output[] = $newRow ? 0 : $currentLevel - 1;
$output[] = $topIndex === 0 ? 0 : $currentLevel - 1;
} else {
$output[] = $currentLevel;
}
Expand Down
7 changes: 1 addition & 6 deletions src/lib/dremel/src/Flow/Dremel/NullNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,10 @@

final class NullNode implements Node
{
public function __construct(private readonly int $level)
public function __construct()
{
}

public function repetition() : int
{
return $this->level;
}

public function value() : array|null
{
return null;
Expand Down
10 changes: 5 additions & 5 deletions src/lib/dremel/src/Flow/Dremel/Stack.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ public function clear() : void
$this->nodes = [];
}

public function dropFlat() : ?array
public function dropFlat() : array
{
$output = [];

if (\count($this->nodes) === 1) {
return $this->nodes[0]->value();
if (!\count($this->nodes)) {
return [];
}

$output = [];

foreach ($this->nodes as $node) {
$output[] = $node->value();
}
Expand Down
70 changes: 65 additions & 5 deletions src/lib/dremel/tests/Flow/Dremel/Tests/Integration/DremelTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,33 @@

final class DremelTest extends TestCase
{
public function test_deeply_nested_lists() : void
{
$values = [
[
[0, 1, 2],
],
[
[3, 4, 5],
[3, 4, 5],
],
[
[6, 7, 8],
],
];

$shredded = (new Dremel())->shred($values, 5);

$this->assertSame(
$values,
(new Dremel())->assemble($shredded->repetitions, $shredded->definitions, $shredded->values)
);
}

public function test_dremel_shredding_and_assembling() : void
{
$repetitions = [0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1];
$definitions = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3];
$definitions = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
$values = [['Suscipit officiis dolorum ea omnis est id magnam.', 'Ea rerum saepe a minima non iusto.'], ['Id dolor et et repellendus.', 'Cumque facilis aut quos et.', 'Sit illum ipsam dolor voluptatem est.'], ['Commodi dicta rerum quas omnis sunt dolor.', 'Architecto sint corrupti nihil soluta nesciunt.', 'Accusamus libero aliquam rerum.'], ['Eum molestias reiciendis cumque ad animi.', 'Sunt ad magnam quas dolores possimus sint aut.', 'Quidem cupiditate doloremque aut esse non.', 'Consequatur nobis delectus aut.', 'Quo fuga fugiat nulla dolor non fugit dolorum.', 'Voluptate ex culpa deleniti est eum qui.', 'Quia sunt quia ut consequatur et optio et.'], ['Aut soluta corrupti laborum qui.', 'Officia maiores natus voluptatem provident aut.', 'Voluptatem modi sequi molestiae aut molestiae.', 'Cumque qui voluptas quia.', 'Quis esse ut odio commodi quae.', 'Voluptatem est accusantium est et eum.', 'Ratione et ut fuga qui atque sed et.', 'Et aut ut quidem provident excepturi placeat.'], ['Rerum molestiae dicta libero dolorem.', 'Expedita fuga sequi a maiores quasi.', 'Nesciunt qui similique et.', 'Architecto perferendis qui sequi sint qui nemo.', 'Sequi in atque tenetur.', 'Voluptatem quod et placeat cupiditate.', 'Qui qui laborum consequatur quos cum totam.', 'Saepe sit quae eos accusamus.', 'Qui illum dolor vel consequuntur nihil.'], ['Vel tenetur velit quas.', 'Natus autem ab beatae nihil recusandae.', 'Ut quasi voluptatum qui dolore ut.', 'Ducimus et minima voluptatem cum sint non.', 'Rerum tenetur sunt quidem est et modi et.', 'Vitae sit eum eius rerum possimus.', 'Eos ipsa est a aliquid impedit doloremque nisi.', 'Aut illum quam sit asperiores.'], ['Repellat dolore sit ad amet sed repudiandae.', 'Quam nemo cum quo culpa.', 'Omnis sed minima vero.', 'Esse qui quo cumque earum eius nulla.', 'Sed in adipisci quas fuga.', 'Dolor est aliquid tempora.', 'Ut expedita id suscipit ut voluptatem.'], ['Sint ipsa et autem ut id vitae.', 'Sapiente ut ab qui.', 'Ullam sit numquam qui perferendis aut.'], ['Qui illum id nam quia quibusdam vero.', 'Quas laboriosam perferendis temporibus vero.', 'Numquam quas deserunt est et eius.', 'Voluptas debitis incidunt ea minus.', 'Pariatur ipsa ipsa sequi ut est dolor adipisci.']];

$dremel = new Dremel();
Expand All @@ -19,11 +42,47 @@ public function test_dremel_shredding_and_assembling() : void
$this->assertSame($repetitions, $shredded->repetitions);
$this->assertSame($definitions, $shredded->definitions);

$assembledValues = \iterator_to_array($dremel->assemble($shredded->repetitions, $shredded->definitions, $shredded->values));
$assembledValues = $dremel->assemble($shredded->repetitions, $shredded->definitions, $shredded->values);

$this->assertSame($values, $assembledValues);
}

public function test_dremel_shredding_and_assembling_list_with_empty_elements() : void
{
$repetitions = [0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1];
$definitions = [3, 3, 3, 1, 3, 3, 3, 0, 2, 2, 2];
$values = [[1, 2, 3], [], [4, 5, 6], null, [null, null, null]];

$dremel = new Dremel();
$shredded = $dremel->shred($values, 3);

$this->assertSame($repetitions, $shredded->repetitions);
$this->assertSame($definitions, $shredded->definitions);

$this->assertSame(
$values,
$dremel->assemble($shredded->repetitions, $shredded->definitions, $shredded->values)
);
}

public function test_dremel_shredding_and_assembling_list_with_nulls_in_list() : void
{
$repetitions = [0, 1, 1, 0, 1, 0, 1, 1];
$definitions = [3, 3, 3, 2, 2, 3, 3, 3];
$values = [[1, 2, 3], [null, null], [4, 5, 6]];

$dremel = new Dremel();
$shredded = $dremel->shred($values, 3);

$this->assertSame($repetitions, $shredded->repetitions);
$this->assertSame($definitions, $shredded->definitions);

$this->assertSame(
$values,
$dremel->assemble($shredded->repetitions, $shredded->definitions, $shredded->values)
);
}

public function test_dremel_shredding_and_assembling_nullable_nested_values() : void
{
$repetitions = [0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0];
Expand All @@ -38,8 +97,9 @@ public function test_dremel_shredding_and_assembling_nullable_nested_values() :
$this->assertSame($definitions, $shredded->definitions);
$this->assertSame($flatValues, $shredded->values);

$assembledValues = \iterator_to_array($dremel->assemble($shredded->repetitions, $shredded->definitions, $flatValues));

$this->assertSame($values, $assembledValues);
$this->assertSame(
$values,
$dremel->assemble($shredded->repetitions, $shredded->definitions, $shredded->values)
);
}
}
Loading