From 52c69732795aa3e0904ac4bb0f25a1ac218df25f Mon Sep 17 00:00:00 2001 From: ndinh215 Date: Fri, 8 Jan 2016 16:24:55 +0700 Subject: [PATCH 1/4] Add the option 'gzip' which allows importing Gzip Json file. --- Command/IndexImportCommand.php | 14 ++++- Service/ImportService.php | 17 ++++-- Service/Json/JsonReader.php | 17 +++++- .../Command/IndexImportCommandTest.php | 57 +++++++++++++++++++ 4 files changed, 97 insertions(+), 8 deletions(-) diff --git a/Command/IndexImportCommand.php b/Command/IndexImportCommand.php index 0fbe7663..9b9456a3 100644 --- a/Command/IndexImportCommand.php +++ b/Command/IndexImportCommand.php @@ -43,6 +43,12 @@ protected function configure() InputOption::VALUE_REQUIRED, 'Set bulk size for import', 1000 + ) + ->addOption( + 'gzip', + 'z', + InputOption::VALUE_NONE, + 'Import a gzip file' ); } @@ -53,13 +59,19 @@ protected function execute(InputInterface $input, OutputInterface $output) { $manager = $this->getManager($input->getOption('manager')); + $options = []; + if ($input->getOption('gzip')) { + $options['gzip'] = true; + } + /** @var ImportService $importService */ $importService = $this->getContainer()->get('es.import'); $importService->importIndex( $manager, $input->getArgument('filename'), $output, - $input->getOption('bulk-size') + $input->getOption('bulk-size'), + $options ); $output->writeln('Data import completed!'); diff --git a/Service/ImportService.php b/Service/ImportService.php index f0a26dfb..0d75add5 100644 --- a/Service/ImportService.php +++ b/Service/ImportService.php @@ -27,10 +27,16 @@ class ImportService * @param string $filename * @param OutputInterface $output * @param int $bulkSize + * @param array $options */ - public function importIndex(Manager $manager, $filename, OutputInterface $output, $bulkSize) - { - $reader = $this->getReader($manager, $this->getFilePath($filename), false); + public function importIndex( + Manager $manager, + $filename, + OutputInterface $output, + $bulkSize, + $options + ) { + $reader = $this->getReader($manager, $this->getFilePath($filename), false, $options); $progress = new ProgressBar($output, $reader->count()); $progress->setRedrawFrequency(100); @@ -81,11 +87,12 @@ protected function getFilePath($filename) * @param Manager $manager * @param string $filename * @param bool $convertDocuments + * @param array $options * * @return JsonReader */ - protected function getReader($manager, $filename, $convertDocuments) + protected function getReader($manager, $filename, $convertDocuments, $options) { - return new JsonReader($manager, $filename, $convertDocuments); + return new JsonReader($manager, $filename, $options, $convertDocuments); } } diff --git a/Service/Json/JsonReader.php b/Service/Json/JsonReader.php index e0a776c9..c189788c 100644 --- a/Service/Json/JsonReader.php +++ b/Service/Json/JsonReader.php @@ -65,19 +65,27 @@ class JsonReader implements \Countable, \Iterator */ private $convertDocuments; + /** + * @var array + */ + private $options; + /** * Constructor. * * @param Manager $manager * @param string $filename + * @param array $options * @param bool $convertDocuments + * */ - public function __construct($manager, $filename, $convertDocuments = true) + public function __construct($manager, $filename, $options, $convertDocuments = true) { $this->manager = $manager; $this->filename = $filename; $this->converter = $manager->getConverter(); $this->convertDocuments = $convertDocuments; + $this->options = $options; } /** @@ -108,7 +116,12 @@ public function getManager() protected function getFileHandler() { if ($this->handle === null) { - $fileHandler = @fopen($this->filename, 'r'); + $isGzip = array_key_exists('gzip', $this->options); + + $filename = !$isGzip? + $this->filename: + sprintf('compress.zlib://%s', $this->filename); + $fileHandler = @fopen($filename, 'r'); if ($fileHandler === false) { throw new \LogicException('Can not open file.'); diff --git a/Tests/Functional/Command/IndexImportCommandTest.php b/Tests/Functional/Command/IndexImportCommandTest.php index 727dca6a..5a90dd03 100644 --- a/Tests/Functional/Command/IndexImportCommandTest.php +++ b/Tests/Functional/Command/IndexImportCommandTest.php @@ -34,6 +34,20 @@ public function bulkSizeProvider() ]; } + /** + * Compressed Data provider for testIndexImport. + * + * @return array + */ + public function compressedDataProvider() + { + return [ + [10, 9, 'command_import_9.json.gz'], + [10, 10, 'command_import_10.json.gz'], + [10, 11, 'command_import_11.json.gz'], + ]; + } + /** * Test for index import command. * @@ -76,6 +90,49 @@ public function testIndexImport($bulkSize, $realSize, $filename) $this->assertEquals($data, $ids); } + /** + * Test for index import command with gzip option. + * + * @param int $bulkSize + * @param int $realSize + * @param string $filename + * + * @dataProvider compressedDataProvider + */ + public function testIndexImportWithGzipOption($bulkSize, $realSize, $filename) + { + $app = new Application(); + $app->add($this->getImportCommand()); + + $command = $app->find('ongr:es:index:import'); + $commandTester = new CommandTester($command); + $commandTester->execute( + [ + 'command' => $command->getName(), + 'filename' => __DIR__ . '/../../app/fixture/data/' . $filename, + '--bulk-size' => $bulkSize, + '--gzip' => null, + ] + ); + + $manager = $this->getManager(); + $manager->dropIndex(); + $repo = $manager->getRepository('AcmeBarBundle:Product'); + $search = $repo + ->createSearch() + ->addQuery(new MatchAllQuery()) + ->setSize($realSize); + $results = $repo->execute($search); + + $ids = []; + foreach ($results as $doc) { + $ids[] = substr($doc->id, 3); + } + sort($ids); + $data = range(1, $realSize); + $this->assertEquals($data, $ids); + } + /** * Returns import index command with assigned container. * From 678ff86872b906a3e85e4c6274c3910e756e0643 Mon Sep 17 00:00:00 2001 From: ndinh215 Date: Fri, 8 Jan 2016 16:30:34 +0700 Subject: [PATCH 2/4] Add the option 'gzip' in document and add some gzip json test files. --- Resources/doc/commands.md | 3 ++- Tests/app/fixture/data/command_import_10.json.gz | Bin 0 -> 177 bytes Tests/app/fixture/data/command_import_11.json.gz | Bin 0 -> 180 bytes Tests/app/fixture/data/command_import_9.json.gz | Bin 0 -> 167 bytes 4 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Tests/app/fixture/data/command_import_10.json.gz create mode 100644 Tests/app/fixture/data/command_import_11.json.gz create mode 100644 Tests/app/fixture/data/command_import_9.json.gz diff --git a/Resources/doc/commands.md b/Resources/doc/commands.md index 5662bf8e..4e086070 100644 --- a/Resources/doc/commands.md +++ b/Resources/doc/commands.md @@ -42,7 +42,8 @@ Imports data to the selected index. We are using custom `JSON` notation to speci | Options | Value | What it does | |:-------------:|:----------------------------:|:--------------------------------------------------------------------------------------:| | `--manager` | *Manager name. e.g.* `default` | Used to select manager to create index for. If not specified, default manager is used. | -| `--bulk-size` | *Bulk size, default 1000* | The document frequency to flush the index on import. +| `--bulk-size` | *Bulk size, default 1000* | The document frequency to flush the index on import. | +| `--gzip` | *not required* | Used to import Gzip Json files.| So here's a simple example how the data looks like: diff --git a/Tests/app/fixture/data/command_import_10.json.gz b/Tests/app/fixture/data/command_import_10.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..afd98cb4663f4234bf1027c9fd44c2e9fbeae4db GIT binary patch literal 177 zcmV;i08alOiwFp=E{|3K17mM(ZDDR?UukV{Z*p{BF)%J_b8l_{jnX>~gD?<;;ohgP zwx+OoL!Dbh5yhGUDZz-W3xv3POfE26{?h2H=4q@wiDO!YM(?iT-;*5MOe1Z342gvb znwF3|MD}-lW7;_nPLf65_wo@redblHrwbz`KuQf#0i@C(H9%?&(g38Os=K#{vKVGNeqj literal 0 HcmV?d00001 diff --git a/Tests/app/fixture/data/command_import_11.json.gz b/Tests/app/fixture/data/command_import_11.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..27864e70ec10068d296463de4e8c2e6ef42ed0a6 GIT binary patch literal 180 zcmV;l089TLiwFq3E{|3K17mM(ZDDR?UukV{Z*p{BF)=P`b8l_{jnX>~gFp;~;hs~h zTvK4)Q0Ep=M6pYOlwd@>1w!0CY%VY^e`)kp^URz($xB?7M&>L3_oT-*(@5JcA$p~p zrX}1R#B++T#NFS6lXTH{T0X?b&%CN*x-e1#q_iLvKq?DT1EjVf4L}+T(gLKlARRzD i3(^Cmw;%&R1`EOkdRt*);Azuqv)2!M__&1)0{{T}KTK`_ literal 0 HcmV?d00001 diff --git a/Tests/app/fixture/data/command_import_9.json.gz b/Tests/app/fixture/data/command_import_9.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0404a0503e4e440cb5d5e53f989beb94fe4bd8f GIT binary patch literal 167 zcmV;Y09gMYiwFpq*o{^I17mM(ZDDR?UukV{Z*p{BIWB5*Z*BmM%sUQ)Fc3x2-m9>t zrs0pNvx_LA7*il63?kzKA@&}V4ZJC@bakH2`;%DGsx*eH_~)d@Hq%JkE-|rELDLd* zN8$30ucV#xaFQ Date: Fri, 8 Jan 2016 17:15:06 +0700 Subject: [PATCH 3/4] Change value of the option 'gzip' to null. --- Command/IndexImportCommand.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Command/IndexImportCommand.php b/Command/IndexImportCommand.php index 9b9456a3..2a6e67f2 100644 --- a/Command/IndexImportCommand.php +++ b/Command/IndexImportCommand.php @@ -61,7 +61,7 @@ protected function execute(InputInterface $input, OutputInterface $output) $options = []; if ($input->getOption('gzip')) { - $options['gzip'] = true; + $options['gzip'] = null; } /** @var ImportService $importService */ From d4b9a5d4e9ec92b09f9e65b7efd44b536945c0ed Mon Sep 17 00:00:00 2001 From: ndinh215 Date: Wed, 13 Jan 2016 14:55:30 +0700 Subject: [PATCH 4/4] Remove the property & method convertDocuments in JsonReader. Refactor redundancy after removing above. --- Command/IndexImportCommand.php | 3 ++- Service/ImportService.php | 10 ++++---- Service/Json/JsonReader.php | 42 ++-------------------------------- 3 files changed, 8 insertions(+), 47 deletions(-) diff --git a/Command/IndexImportCommand.php b/Command/IndexImportCommand.php index 2a6e67f2..f717f4ff 100644 --- a/Command/IndexImportCommand.php +++ b/Command/IndexImportCommand.php @@ -59,10 +59,12 @@ protected function execute(InputInterface $input, OutputInterface $output) { $manager = $this->getManager($input->getOption('manager')); + // Initialize options array $options = []; if ($input->getOption('gzip')) { $options['gzip'] = null; } + $options['bulk-size'] = $input->getOption('bulk-size'); /** @var ImportService $importService */ $importService = $this->getContainer()->get('es.import'); @@ -70,7 +72,6 @@ protected function execute(InputInterface $input, OutputInterface $output) $manager, $input->getArgument('filename'), $output, - $input->getOption('bulk-size'), $options ); diff --git a/Service/ImportService.php b/Service/ImportService.php index 0d75add5..0091e611 100644 --- a/Service/ImportService.php +++ b/Service/ImportService.php @@ -26,22 +26,21 @@ class ImportService * @param Manager $manager * @param string $filename * @param OutputInterface $output - * @param int $bulkSize * @param array $options */ public function importIndex( Manager $manager, $filename, OutputInterface $output, - $bulkSize, $options ) { - $reader = $this->getReader($manager, $this->getFilePath($filename), false, $options); + $reader = $this->getReader($manager, $this->getFilePath($filename), $options); $progress = new ProgressBar($output, $reader->count()); $progress->setRedrawFrequency(100); $progress->start(); + $bulkSize = $options['bulk-size']; foreach ($reader as $key => $document) { $data = $document['_source']; $data['_id'] = $document['_id']; @@ -86,13 +85,12 @@ protected function getFilePath($filename) * * @param Manager $manager * @param string $filename - * @param bool $convertDocuments * @param array $options * * @return JsonReader */ - protected function getReader($manager, $filename, $convertDocuments, $options) + protected function getReader($manager, $filename, $options) { - return new JsonReader($manager, $filename, $options, $convertDocuments); + return new JsonReader($manager, $filename, $options); } } diff --git a/Service/Json/JsonReader.php b/Service/Json/JsonReader.php index c189788c..f8e73e65 100644 --- a/Service/Json/JsonReader.php +++ b/Service/Json/JsonReader.php @@ -12,7 +12,6 @@ namespace ONGR\ElasticsearchBundle\Service\Json; use ONGR\ElasticsearchBundle\Service\Manager; -use ONGR\ElasticsearchBundle\Result\Converter; use Symfony\Component\OptionsResolver\OptionsResolver; /** @@ -45,11 +44,6 @@ class JsonReader implements \Countable, \Iterator */ private $metadata; - /** - * @var Converter - */ - private $converter; - /** * @var Manager */ @@ -60,11 +54,6 @@ class JsonReader implements \Countable, \Iterator */ private $optionsResolver; - /** - * @var bool - */ - private $convertDocuments; - /** * @var array */ @@ -76,15 +65,12 @@ class JsonReader implements \Countable, \Iterator * @param Manager $manager * @param string $filename * @param array $options - * @param bool $convertDocuments * */ - public function __construct($manager, $filename, $options, $convertDocuments = true) + public function __construct($manager, $filename, $options) { $this->manager = $manager; $this->filename = $filename; - $this->converter = $manager->getConverter(); - $this->convertDocuments = $convertDocuments; $this->options = $options; } @@ -180,7 +166,7 @@ protected function readLine() } $data = json_decode(rtrim($buffer, ','), true); - $this->currentLine = $this->convertDocument($this->getOptionsResolver()->resolve($data)); + $this->currentLine = $this->getOptionsResolver()->resolve($data); } /** @@ -276,14 +262,6 @@ public function getMetadata() return $this->metadata; } - /** - * @return Converter - */ - protected function getConverter() - { - return $this->converter; - } - /** * Returns configured options resolver instance. * @@ -298,20 +276,4 @@ private function getOptionsResolver() return $this->optionsResolver; } - - /** - * Converts array to document. - * - * @param array $document - * - * @return object - */ - private function convertDocument($document) - { - if (!$this->convertDocuments) { - return $document; - } - - return $this->getConverter()->convertToDocument($document, $this->getManager()); - } }