Skip to content

Commit

Permalink
Merge pull request #49 from ezsystems/fork
Browse files Browse the repository at this point in the history
Added multiconcurrency support in ezxmltext convert command
  • Loading branch information
vidarl authored Jun 26, 2018
2 parents fd0b8b2 + 45a0b11 commit 43032b4
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 25 deletions.
207 changes: 184 additions & 23 deletions bundle/Command/ConvertXmlTextToRichTextCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
use eZ\Publish\Core\FieldType\XmlText\Converter\RichText as RichTextConverter;
use Doctrine\DBAL\Connection;
use Symfony\Component\Debug\Exception\ContextErrorException;
use Symfony\Component\Process\PhpExecutableFinder;
use Symfony\Component\Process\ProcessBuilder;

class ConvertXmlTextToRichTextCommand extends ContainerAwareCommand
{
const MAX_OJBECTS_PER_CHILD = 1000;
/**
* @var \Doctrine\DBAL\Connection
*/
Expand All @@ -34,6 +37,26 @@ class ConvertXmlTextToRichTextCommand extends ContainerAwareCommand
*/
private $converter;

/**
* @var array.
*/
protected $imageContentTypeIdentifiers;

/**
* @var array
*/
protected $processes = [];

/**
* @var int
*/
protected $maxConcurrency;

/**
* @var string
*/
private $phpPath;

public function __construct(Connection $dbal, RichTextConverter $converter, LoggerInterface $logger)
{
parent::__construct();
Expand All @@ -55,6 +78,13 @@ protected function configure()
This is a non-finalized work in progress. ALWAYS make sure you have a restorable backup of your database before using it.
EOT
)
->addOption(
'concurrency',
null,
InputOption::VALUE_OPTIONAL,
'Number of child processes to use when converting fields.',
1
)
->addOption(
'dry-run',
null,
Expand Down Expand Up @@ -97,25 +127,15 @@ protected function configure()

protected function execute(InputInterface $input, OutputInterface $output)
{
$this->loginAsAdmin();
$dryRun = false;
if ($input->getOption('dry-run')) {
$this->baseExecute($input, $output, $dryRun);
if ($dryRun) {
$output->writeln("Running in dry-run mode. No changes will actually be written to database\n");
$dryRun = true;
}

$testContentId = $input->getOption('test-content-object');

if ($input->getOption('image-content-types')) {
$contentTypeIdentifiers = explode(',', $input->getOption('image-content-types'));
} else {
$contentTypeIdentifiers = ['image'];
if ($testContentId !== null && $this->maxConcurrency !== 1) {
throw new RuntimeException('Multi concurrency is not supported together with the --test-content-object option');
}
$contentTypeIds = $this->getContentTypeIds($contentTypeIdentifiers);
if (count($contentTypeIds) !== count($contentTypeIdentifiers)) {
throw new RuntimeException('Unable to lookup all content type identifiers, found : ' . implode(',', $contentTypeIds));
}
$this->converter->setImageContentTypes($contentTypeIds);

if ($input->getOption('fix-embedded-images-only')) {
$output->writeln("Fixing embedded images only. No other changes are done to the database\n");
Expand All @@ -130,14 +150,42 @@ protected function execute(InputInterface $input, OutputInterface $output)
$dryRun = true;
}

$this->convertFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), !$input->getOption('disable-id-value-check'), $output);
$this->processFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), !$input->getOption('disable-id-value-check'), $output);
}

protected function baseExecute(InputInterface $input, OutputInterface $output, &$dryRun)
{
$this->loginAsAdmin();
$dryRun = false;
if ($input->getOption('dry-run')) {
$dryRun = true;
}

$this->maxConcurrency = (int) $input->getOption('concurrency');
if ($this->maxConcurrency < 1) {
throw new RuntimeException('Invalid value for "--concurrency" given');
}
if ($input->getOption('fix-embedded-images-only') && $this->maxConcurrency !== 1) {
throw new RuntimeException('Multi concurrency is not supported together with the --fix-embedded-images-only option');
}

if ($input->getOption('image-content-types')) {
$this->imageContentTypeIdentifiers = explode(',', $input->getOption('image-content-types'));
} else {
$this->imageContentTypeIdentifiers = ['image'];
}
$imageContentTypeIds = $this->getContentTypeIds($this->imageContentTypeIdentifiers);
if (count($imageContentTypeIds) !== count($this->imageContentTypeIdentifiers)) {
throw new RuntimeException('Unable to lookup all content type identifiers, not found : ' . implode(',', array_diff($this->imageContentTypeIdentifiers, array_keys($imageContentTypeIds))));
}
$this->converter->setImageContentTypes($imageContentTypeIds);
}

protected function getContentTypeIds($contentTypeIdentifiers)
{
$query = $this->dbal->createQueryBuilder();

$query->select('c.id')
$query->select('c.identifier, c.id')
->from('ezcontentclass', 'c')
->where(
$query->expr()->in(
Expand All @@ -149,7 +197,7 @@ protected function getContentTypeIds($contentTypeIdentifiers)

$statement = $query->execute();

return $statement->fetchAll(PDO::FETCH_COLUMN);
return array_map('reset', $statement->fetchAll(PDO::FETCH_GROUP | PDO::FETCH_COLUMN));
}

protected function loginAsAdmin()
Expand Down Expand Up @@ -287,11 +335,16 @@ protected function getRowCountOfContentObjectAttributes($datatypeString, $conten
}

/**
* Get the specified field rows.
* Note that if $contentId !== null, then $offset and $limit will be ignored.
*
* @param $datatypeString
* @param $contentId
* @param $offset
* @param $limit
* @return \Doctrine\DBAL\Driver\Statement|int
*/
protected function getFieldRows($datatypeString, $contentId)
protected function getFieldRows($datatypeString, $contentId, $offset, $limit)
{
$query = $this->dbal->createQueryBuilder();
$query->select('a.*')
Expand All @@ -302,9 +355,13 @@ protected function getFieldRows($datatypeString, $contentId)
':datatypestring'
)
)
->orderBy('a.id')
->setParameter(':datatypestring', $datatypeString);

if ($contentId !== null) {
if ($contentId === null) {
$query->setFirstResult($offset)
->setMaxResults($limit);
} else {
$query->andWhere(
$query->expr()->eq(
'a.contentobject_id',
Expand Down Expand Up @@ -347,14 +404,94 @@ protected function updateFieldRow($dryRun, $id, $version, $datatext)
}
}

protected function convertFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, OutputInterface $output)
protected function waitForAvailableProcessSlot(OutputInterface $output)
{
$count = $this->getRowCountOfContentObjectAttributes('ezxmltext', $contentId);
if (count($this->processes) >= $this->maxConcurrency) {
$this->waitForChild($output);
}
}

$output->writeln("Found $count field rows to convert.");
protected function waitForChild(OutputInterface $output)
{
$childEnded = false;
while (!$childEnded) {
foreach ($this->processes as $pid => $p) {
$process = $p['process'];

if (!$process->isRunning()) {
$output->write($process->getIncrementalOutput());
$output->write($process->getIncrementalErrorOutput());
$childEnded = true;
$exitStatus = $process->getExitCode();
if ($exitStatus !== 0) {
throw new RuntimeException(sprintf('Child process (offset=%s, limit=%s) ended with status code %d. Terminating', $p['offset'], $p['limit'], $exitStatus));
}
unset($this->processes[$pid]);
break;
}
$output->write($process->getIncrementalOutput());
$output->write($process->getIncrementalErrorOutput());
}
sleep(1);
}

$statement = $this->getFieldRows('ezxmltext', $contentId);
return;
}

private function createChildProcess($dryRun, $checkDuplicateIds, $checkIdValues, $offset, $limit, OutputInterface $output)
{
$arguments = [
file_exists('bin/console') ? 'bin/console' : 'app/console',
'ezxmltext:convert-to-richtext-sub-process',
"--offset=$offset",
"--limit=$limit",
'--image-content-types=' . implode(',', $this->imageContentTypeIdentifiers),
];
if ($dryRun) {
$arguments[] = '--dry-run';
}
if (!$checkDuplicateIds) {
$arguments[] = '--disable-duplicate-id-check';
}
if (!$checkIdValues) {
$arguments[] = '--disable-id-value-check';
}
if ($output->isVerbose()) {
$arguments[] = '-v';
} elseif ($output->isVeryVerbose()) {
$arguments[] = '-vv';
} elseif ($output->isDebug()) {
$arguments[] = '-vvv';
}

$process = new ProcessBuilder($arguments);
$process->setTimeout(null);
$process->setPrefix($this->getPhpPath());
$p = $process->getProcess();
$p->start();

return $p;
}

private function getPhpPath()
{
if ($this->phpPath) {
return $this->phpPath;
}
$phpFinder = new PhpExecutableFinder();
$this->phpPath = $phpFinder->find();
if (!$this->phpPath) {
throw new \RuntimeException(
'The php executable could not be found, it\'s needed for executing parable sub processes, so add it to your PATH environment variable and try again'
);
}

return $this->phpPath;
}

protected function convertFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, $offset, $limit)
{
$statement = $this->getFieldRows('ezxmltext', $contentId, $offset, $limit);
while ($row = $statement->fetch(PDO::FETCH_ASSOC)) {
if (empty($row['data_text'])) {
$inputValue = Value::EMPTY_VALUE;
Expand Down Expand Up @@ -384,7 +521,31 @@ protected function convertFields($dryRun, $contentId, $checkDuplicateIds, $check
]
);
}
}

protected function processFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, OutputInterface $output)
{
$count = $this->getRowCountOfContentObjectAttributes('ezxmltext', $contentId);
$output->writeln("Found $count field rows to convert.");

$offset = 0;
$fork = $this->maxConcurrency > 1 && $contentId === null;

while ($offset + self::MAX_OJBECTS_PER_CHILD <= $count) {
$limit = self::MAX_OJBECTS_PER_CHILD;
if ($fork) {
$this->waitForAvailableProcessSlot($output);
$process = $this->createChildProcess($dryRun, $checkDuplicateIds, $checkIdValues, $offset, $limit, $output);
$this->processes[$process->getPid()] = ['offset' => $offset, 'limit' => $limit, 'process' => $process];
} else {
$this->convertFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, $offset, $limit);
}
$offset += self::MAX_OJBECTS_PER_CHILD;
}

while (count($this->processes) > 1) {
$this->waitForChild($output);
}
$output->writeln("Converted $count ezxmltext fields to richtext");
}

Expand Down
44 changes: 44 additions & 0 deletions bundle/Command/ConvertXmlTextToRichTextCommandSubProcess.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php
/**
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
namespace EzSystems\EzPlatformXmlTextFieldTypeBundle\Command;

use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use eZ\Publish\Core\FieldType\XmlText\Value;

class ConvertXmlTextToRichTextCommandSubProcess extends ConvertXmlTextToRichTextCommand
{
protected function configure()
{
parent::configure();
$this
->setName('ezxmltext:convert-to-richtext-sub-process')
->setDescription('internal command used by ezxmltext:convert-to-richtext')
->setHidden(true)
->addOption(
'offset',
null,
InputOption::VALUE_REQUIRED,
'Offset'
)
->addOption(
'limit',
null,
InputOption::VALUE_REQUIRED,
'Limit'
);
}

protected function execute(InputInterface $input, OutputInterface $output)
{
$this->baseExecute($input, $output, $dryRun);

$offset = $input->getOption('offset');
$limit = $input->getOption('limit');

$this->convertFields($dryRun, null, !$input->getOption('disable-duplicate-id-check'), !$input->getOption('disable-id-value-check'), $offset, $limit);
}
}
8 changes: 7 additions & 1 deletion bundle/Resources/config/services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ services:
tags:
- { name: ezpublish_rest.field_type_processor, alias: ezxmltext }

ezxmltext.command.convert_to_richtexst:
ezxmltext.command.convert_to_richtext:
class: EzSystems\EzPlatformXmlTextFieldTypeBundle\Command\ConvertXmlTextToRichTextCommand
arguments:
- "@ezpublish.persistence.connection"
Expand All @@ -16,6 +16,12 @@ services:
tags:
- { name: console.command }

ezxmltext.command.convert_to_richtext_sub_process:
class: EzSystems\EzPlatformXmlTextFieldTypeBundle\Command\ConvertXmlTextToRichTextCommandSubProcess
parent: ezxmltext.command.convert_to_richtext
tags:
- { name: console.command }

ezxmltext.richtext_converter:
class: eZ\Publish\Core\FieldType\XmlText\Converter\RichText
arguments:
Expand Down
10 changes: 9 additions & 1 deletion lib/FieldType/XmlText/Converter/RichText.php
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,15 @@ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false,
$this->removeComments($inputDocument);

$this->checkEmptyEmbedTags($inputDocument);
$convertedDocument = $this->getConverter()->convert($inputDocument);
try {
$convertedDocument = $this->getConverter()->convert($inputDocument);
} catch (\Exception $e) {
$this->logger->error(
"Unable to convert ezmltext for contentobject_attribute.id=$contentFieldId",
['errors' => $e->getMessage()]
);
throw $e;
}
if ($checkDuplicateIds) {
$this->reportNonUniqueIds($convertedDocument, $contentFieldId);
}
Expand Down

0 comments on commit 43032b4

Please sign in to comment.