-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8a7bbbd
commit e725f1e
Showing
5 changed files
with
195 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
...parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/DataPageV2Statistics.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
<?php declare(strict_types=1); | ||
|
||
namespace Flow\Parquet\ParquetFile\RowGroupBuilder\PageBuilder; | ||
|
||
use Flow\Parquet\Data\ObjectToString; | ||
use Flow\Parquet\ParquetFile\RowGroupBuilder\Statistics\Comparator; | ||
|
||
final class DataPageV2Statistics | ||
{ | ||
private Comparator $comparator; | ||
|
||
private mixed $max; | ||
|
||
private mixed $min; | ||
|
||
private int $nullCount; | ||
|
||
private array $values = []; | ||
|
||
private int $valuesCount; | ||
|
||
public function __construct() | ||
{ | ||
$this->nullCount = 0; | ||
$this->valuesCount = 0; | ||
$this->min = null; | ||
$this->max = null; | ||
$this->comparator = new Comparator(); | ||
} | ||
|
||
public function add(string|int|float|null|array|bool|object $value) : void | ||
{ | ||
if (\is_array($value)) { | ||
$this->valuesCount += \count($value); | ||
} else { | ||
$this->valuesCount++; | ||
} | ||
|
||
if ($value === null) { | ||
$this->nullCount++; | ||
|
||
return; | ||
} | ||
|
||
if (\is_array($value)) { | ||
foreach ($value as $val) { | ||
|
||
if ($this->comparator->isLessThan($val, $this->min)) { | ||
$this->min = $val; | ||
} | ||
|
||
if ($this->comparator->isGreaterThan($val, $this->max)) { | ||
$this->max = $val; | ||
} | ||
|
||
$this->values[] = \is_object($val) ? ObjectToString::toString($val) : $val; | ||
} | ||
} else { | ||
if ($this->comparator->isLessThan($value, $this->min)) { | ||
$this->min = $value; | ||
} | ||
|
||
if ($this->comparator->isGreaterThan($value, $this->max)) { | ||
$this->max = $value; | ||
} | ||
|
||
$this->values[] = \is_object($value) ? ObjectToString::toString($value) : $value; | ||
} | ||
} | ||
|
||
public function distinctCount() : int | ||
{ | ||
if ([] === $this->values) { | ||
return 0; | ||
} | ||
|
||
return \count(\array_unique($this->values)); | ||
} | ||
|
||
public function max() : mixed | ||
{ | ||
return $this->max; | ||
} | ||
|
||
public function min() : mixed | ||
{ | ||
return $this->min; | ||
} | ||
|
||
public function nullCount() : int | ||
{ | ||
return $this->nullCount; | ||
} | ||
|
||
public function values() : array | ||
{ | ||
return $this->values; | ||
} | ||
|
||
public function valuesCount() : int | ||
{ | ||
return $this->valuesCount; | ||
} | ||
} |
35 changes: 35 additions & 0 deletions
35
...ib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/StatisticsBuilder.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
<?php declare(strict_types=1); | ||
|
||
namespace Flow\Parquet\ParquetFile\RowGroupBuilder\PageBuilder; | ||
|
||
use Flow\Parquet\BinaryWriter\BinaryBufferWriter; | ||
use Flow\Parquet\Data\DataConverter; | ||
use Flow\Parquet\ParquetFile\Data\PlainValuesPacker; | ||
use Flow\Parquet\ParquetFile\Schema\FlatColumn; | ||
use Flow\Parquet\ParquetFile\Statistics; | ||
|
||
final class StatisticsBuilder | ||
{ | ||
public function __construct(private readonly DataConverter $dataConverter) | ||
{ | ||
|
||
} | ||
|
||
public function build(FlatColumn $column, DataPageV2Statistics $chunkStatistics) : Statistics | ||
{ | ||
$minBuffer = ''; | ||
$maxBuffer = ''; | ||
|
||
(new PlainValuesPacker(new BinaryBufferWriter($minBuffer), $this->dataConverter))->packValues($column, [$chunkStatistics->min()]); | ||
(new PlainValuesPacker(new BinaryBufferWriter($maxBuffer), $this->dataConverter))->packValues($column, [$chunkStatistics->max()]); | ||
|
||
return new Statistics( | ||
max: $maxBuffer, | ||
min: $minBuffer, | ||
nullCount: $chunkStatistics->nullCount(), | ||
distinctCount: $chunkStatistics->distinctCount(), | ||
maxValue: $maxBuffer, | ||
minValue: $minBuffer, | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters