Skip to content

Commit

Permalink
Dremel & Parquet performance improvements (#862)
Browse files Browse the repository at this point in the history
  • Loading branch information
norberttech authored Dec 3, 2023
1 parent 587ad5b commit 085d8b3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public function destination() : Path

public function load(Rows $rows, FlowContext $context) : void
{
if ($this->schema === null) {
if ($this->schema === null && $this->inferredSchema === null) {
$this->inferSchema($rows);
}

Expand Down
24 changes: 13 additions & 11 deletions src/lib/dremel/src/Flow/Dremel/Dremel.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,12 @@ public function shred(array $data, int $maxDefinitionLevel) : DataShredded
{
$definitions = [];
$this->buildDefinitions($data, $definitions, $maxDefinitionLevel);
$repetitions = $this->buildRepetitions($data);
$repetitions = [];
$this->buildRepetitions($data, 0, 0, $repetitions);

if (!\count($repetitions) || \max($repetitions) === 0) {
$repetitions = [];
}

return new DataShredded(
$repetitions,
Expand Down Expand Up @@ -126,10 +131,8 @@ private function buildDefinitions(array $data, array &$definitions, int $maxDefi
}
}

private function buildRepetitions(array $data, int $currentLevel = 0, int $topIndex = 0) : array
private function buildRepetitions(array $data, int $currentLevel, int $topIndex, array &$output) : void
{
$output = [];

foreach ($data as $index => $item) {
if (\is_array($item)) {

Expand All @@ -139,7 +142,12 @@ private function buildRepetitions(array $data, int $currentLevel = 0, int $topIn
continue;
}

$output = \array_merge($output, $this->buildRepetitions($item, $currentLevel + 1, $index));
$childRepetitions = [];
$this->buildRepetitions($item, $currentLevel + 1, $index, $childRepetitions);

foreach ($childRepetitions as $repetition) {
$output[] = $repetition;
}
} else {
if (!\count($output)) {
$output[] = $topIndex === 0 ? 0 : $currentLevel - 1;
Expand All @@ -148,12 +156,6 @@ private function buildRepetitions(array $data, int $currentLevel = 0, int $topIn
}
}
}

if (!\count($output) || \max($output) === 0) {
return [];
}

return $output;
}

private function value(int $definition, int $maxDefinitionLevel, array $values, int &$valueIndex) : mixed
Expand Down

0 comments on commit 085d8b3

Please sign in to comment.