Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rework to support multiple concurrent instances #166

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ The first time you install this app, before using a cron job, you properly want
</description>
<licence>AGPL</licence>
<author>Roeland Jago Douma</author>
<version>2.1.0</version>
<version>2.2.0</version>
<namespace>PreviewGenerator</namespace>
<category>multimedia</category>
<website>https://github.com/rullzer/previewgenerator</website>
Expand Down
1 change: 1 addition & 0 deletions composer/composer/autoload_classmap.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
'OCA\\PreviewGenerator\\Command\\PreGenerate' => $baseDir . '/../lib/Command/PreGenerate.php',
'OCA\\PreviewGenerator\\Command\\TimestampFormatter' => $baseDir . '/../lib/Command/TimestampFormatter.php',
'OCA\\PreviewGenerator\\Migration\\Version020000Date20180823071939' => $baseDir . '/../lib/Migration/Version020000Date20180823071939.php',
'OCA\\PreviewGenerator\\Migration\\Version020200Date20190608205303' => $baseDir . '/../lib/Migration/Version020200Date20190608205303.php',
'OCA\\PreviewGenerator\\SizeHelper' => $baseDir . '/../lib/SizeHelper.php',
'OCA\\PreviewGenerator\\Watcher' => $baseDir . '/../lib/Watcher.php',
);
1 change: 1 addition & 0 deletions composer/composer/autoload_static.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class ComposerStaticInitPreviewGenerator
'OCA\\PreviewGenerator\\Command\\PreGenerate' => __DIR__ . '/..' . '/../lib/Command/PreGenerate.php',
'OCA\\PreviewGenerator\\Command\\TimestampFormatter' => __DIR__ . '/..' . '/../lib/Command/TimestampFormatter.php',
'OCA\\PreviewGenerator\\Migration\\Version020000Date20180823071939' => __DIR__ . '/..' . '/../lib/Migration/Version020000Date20180823071939.php',
'OCA\\PreviewGenerator\\Migration\\Version020200Date20190608205303' => __DIR__ . '/..' . '/../lib/Migration/Version020200Date20190608205303.php',
'OCA\\PreviewGenerator\\SizeHelper' => __DIR__ . '/..' . '/../lib/SizeHelper.php',
'OCA\\PreviewGenerator\\Watcher' => __DIR__ . '/..' . '/../lib/Watcher.php',
);
Expand Down
57 changes: 51 additions & 6 deletions lib/Command/Generate.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use OCP\Files\IRootFolder;
use OCP\Files\NotFoundException;
use OCP\IConfig;
use OCP\IDBConnection;
use OCP\IPreview;
use OCP\IUser;
use OCP\IUserManager;
Expand All @@ -54,6 +55,9 @@ class Generate extends Command {
/** @var IConfig */
protected $config;

/** @var IDBConnection */
protected $connection;

/** @var OutputInterface */
protected $output;

Expand All @@ -67,13 +71,15 @@ public function __construct(IRootFolder $rootFolder,
IUserManager $userManager,
IPreview $previewGenerator,
IConfig $config,
IDBConnection $connection,
IManager $encryptionManager) {
parent::__construct();

$this->userManager = $userManager;
$this->rootFolder = $rootFolder;
$this->previewGenerator = $previewGenerator;
$this->config = $config;
$this->connection = $connection;
$this->encryptionManager = $encryptionManager;
}

Expand Down Expand Up @@ -142,38 +148,77 @@ private function generatePathPreviews(IUser $user, string $path) {
return;
}
$pathFolder = $userFolder->get($relativePath);
$this->parseFolder($pathFolder);
$this->processFolder($pathFolder, $user);
}

private function generateUserPreviews(IUser $user) {
\OC_Util::tearDownFS();
\OC_Util::setupFS($user->getUID());

$userFolder = $this->rootFolder->getUserFolder($user->getUID());
$this->parseFolder($userFolder);
$this->processFolder($userFolder, $user);
}

private function parseFolder(Folder $folder) {
private function processFolder(Folder $folder, IUser $user) {
// Respect the '.nomedia' file. If present don't traverse the folder
if ($folder->nodeExists('.nomedia')) {
$this->output->writeln('Skipping folder ' . $folder->getPath());
return;
}

// random sleep between 0 and 50ms to avoid collision between 2 processes
usleep(rand(0,50000));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we not get around this with a DB transaction?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, you could get a lock by querying a row where file is node.id and locked is false, setting locked to true.
If this returns a result, you have the lock. If no lock, skip/wait.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

I am not sure I understand what you both mean. Ideally we wouldn't have to do this if we could use some kind of pthreads/parallel approach but since ZST packages are not available in current Debian systems, or even in Sury, I am calling the occ command several times in parallel.

Indeed, you could get a lock by querying a row where file is node.id and locked is false, setting locked to true.
If this returns a result, you have the lock. If no lock, skip/wait.

Since we can't control different threads, but we have independent processes, then I added the lock column to the database so that two different instances don't start working on the same preview at the same time.

If you look at the code, that is exactly what I do, but because there is a chance that both instances read the lock value as unlocked at the same time before setting it as locked, I added an ugly random wait to minimize the chance of this happening.

This wouldn't be needed if we had multithread approach with barriers, waits and so on. (or just give each child thread a different group of previews to work on).

I hope it is clear now. This is not particularly pretty but given the limitations we have it has proven very much effective :)

Copy link
Member

@MichaIng MichaIng Sep 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am no PHP or SQL coder, but isn't there a way to combine a lock check + lock creation to eliminate the race condition?

E.g. in shell I do something like this:

set -C
if { > process.lock; } 2> /dev/null; then
  start_process
else
  skip_process
fi
set +C
  • set -C adds the noclobber option, thus nothing can be written to existing files.
  • Assuming that two threads cannot write the same file at the same time, one write (empty redirect to lock file) must fail, which results in process is skipped.
  • So basically checking for a lock and creating the lock is one and the same quick task. And the check practically occurs after the lock was created (checking exit code).
  • Isn't there something similar possible for the preview generation processes, through PHP or SQL?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to ran several instances simultaneously

for i in `seq 1 16`; do
  occ_command &
done
wait

I want several processes in parallel, so the locking needs to be in the PHP section

Copy link
Member Author

@nachoparker nachoparker Sep 18, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need return code which checks for success (you are thinking shell scripting), we can just work with SQL to see if the column is locked or a new PID column matches, that's quite straightforward.

There's many ways to skin a cat. Deleting at the very end of the process would be an easy improvement but I don't want to keep going if this is going nowhere so I am awaiting feedback from the maintainer.

Copy link

@stefaang stefaang Sep 18, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been working on a python version of the preview generator with jpegtran for blazing fast image processing.

This snippet allows running multiple python processes.

for fileid, storage, name, path in image_filecache_query:                             
    # check preview dir                                              
    try:                                                             
        cursor.execute("INSERT INTO sg_preview (path, pid) VALUES ('{0}_{1}', {2})".format(storage, path, os.getpid()))
        if not cursor:                                               
            continue                                                 
        conn.commit()                                                
    except MySQLdb.Error:                                            
        continue                                                     
    process_image(...)

I'll let you know when it's ready to be published :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With Python as dependency IMO not a solution for the default app, but nice as alternative, if it's finally more efficient/faster and/or for systems where Python is available anyway.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stefaang sounds cool, great. I imagine it spawns threads in parallel or uses multithreading per image?

That would be a nice thing to have even though ideally this would be implemented in multithreading PHP in the long run.

But hey, that might never happen so kudos

Copy link
Member

@MichaIng MichaIng Sep 19, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nachoparker

Don't need return code which checks for success (you are thinking shell scripting), we can just work with SQL to see if the column is locked or a new PID column matches, that's quite straightforward.

That would not solve the race condition. The problem is the time between checking the fields state/value and setting new state/value.

E.g. two parallel processes/threads by chance check the same field nearly the same time. One checks slightly earlier but it can take slightly too long for it to set the locked state/value, so the other process still finds the filed unlocked during this minimal but existing time frame => both process the same file.

So generally when doing a check and setting a value based on the checks result, this strictly inherits such a race condition. For this reason it would be great to skip the check and set the value directly. But this requires a set function that fails conditionally and this failure must be recognised afterwards to decide whether to go on processing or skipping it.
This matches as well what @stefaang suggested above schematically.


But yeah, I think you are right that before discussing such polishing, the general decision whether this is wanted or not and on which level the multi-threading should be done, has been made.


$this->output->writeln('Scanning folder ' . $folder->getPath());

$nodes = $folder->getDirectoryListing();

foreach ($nodes as $node) {
if ($node instanceof Folder) {
$this->parseFolder($node);
$this->processFolder($node, $user);
} else if ($node instanceof File) {
$this->parseFile($node);
$is_locked = false;
$qb = $this->connection->getQueryBuilder();
$row = $qb->select('*')
->from('preview_generation')
->where($qb->expr()->eq('file_id', $qb->createNamedParameter($node->getId())))
->setMaxResults(1)
->execute()
->fetch();
if ($row !== false) {
if ($row['locked'] == 1) {
// already being processed
$is_locked = true;
} else {
$qb->update('preview_generation')
->where($qb->expr()->eq('file_id', $qb->createNamedParameter($node->getId())))
->set('locked', $qb->createNamedParameter(true))
->execute();
}
} else {
$qb->insert('preview_generation')
->values([
'uid' => $qb->createNamedParameter($user->getUID()),
'file_id' => $qb->createNamedParameter($node->getId()),
'locked' => $qb->createNamedParameter(true),
])
->execute();
}

if ($is_locked === false) {
try {
$this->processFile($node);
} finally {
$qb->delete('preview_generation')
->where($qb->expr()->eq('file_id', $qb->createNamedParameter($node->getId())))
->execute();
}
}
}
}
}

private function parseFile(File $file) {
private function processFile(File $file) {
if ($this->previewGenerator->isMimeSupported($file->getMimeType())) {
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
$this->output->writeln('Generating previews for ' . $file->getPath());
Expand Down
77 changes: 22 additions & 55 deletions lib/Command/PreGenerate.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,6 @@ protected function execute(InputInterface $input, OutputInterface $output) {
return 1;
}

if ($this->checkAlreadyRunning()) {
$output->writeln('Command is already running.');
return 2;
}

$this->setPID();

// Set timestamp output
$formatter = new TimestampFormatter($this->config, $output->getFormatter());
$output->setFormatter($formatter);
Expand All @@ -131,37 +124,36 @@ protected function execute(InputInterface $input, OutputInterface $output) {
$this->sizes = SizeHelper::calculateSizes($this->config);
$this->startProcessing();

$this->clearPID();

return 0;
}

private function startProcessing() {
// random sleep between 0 and 50ms to avoid collision between 2 processes
usleep(rand(0,50000));

while(true) {
$qb = $this->connection->getQueryBuilder();
$qb->select('*')
$row = $qb->select('*')
->from('preview_generation')
->orderBy('id')
->setMaxResults(1000);
$cursor = $qb->execute();
$rows = $cursor->fetchAll();
$cursor->closeCursor();
->where($qb->expr()->eq('locked', $qb->createNamedParameter(false)))
->setMaxResults(1)
->execute()
->fetch();

if ($rows === []) {
if ($row === false) {
break;
}

foreach ($rows as $row) {
/*
* First delete the row so that if preview generation fails for some reason
* the next run can just continue
*/
$qb = $this->connection->getQueryBuilder();
$qb->delete('preview_generation')
->where($qb->expr()->eq('id', $qb->createNamedParameter($row['id'])));
$qb->execute();

$qb->update('preview_generation')
->where($qb->expr()->eq('id', $qb->createNamedParameter($row['id'])))
->set('locked', $qb->createNamedParameter(true))
->execute();
try {
$this->processRow($row);
} finally {
$qb->delete('preview_generation')
->where($qb->expr()->eq('id', $qb->createNamedParameter($row['id'])))
->execute();
}
}
}
Expand Down Expand Up @@ -215,40 +207,15 @@ private function processFile(File $file) {
$this->previewGenerator->getPreview($file, $width, -1, false);
}
} catch (NotFoundException $e) {
// Maybe log that previews could not be generated?
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
$error = $e->getMessage();
$this->output->writeln("<error>${error} " . $file->getPath() . " not found.</error>");
}
} catch (\InvalidArgumentException $e) {
$error = $e->getMessage();
$this->output->writeln("<error>${error}</error>");
}
}
}

private function setPID() {
$this->config->setAppValue($this->appName, 'pid', posix_getpid());
}

private function clearPID() {
$this->config->deleteAppValue($this->appName, 'pid');
}

private function getPID() {
return (int)$this->config->getAppValue($this->appName, 'pid', -1);
}

private function checkAlreadyRunning() {
$pid = $this->getPID();

// No PID set so just continue
if ($pid === -1) {
return false;
}

// Get get the gid of non running processes so continue
if (posix_getpgid($pid) === false) {
return false;
}

// Seems there is already a running process generating previews
return true;
}
}
53 changes: 53 additions & 0 deletions lib/Migration/Version020200Date20190608205303.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyleft (c) 2019, Ignacio Nunez <nacho@ownyourbits.com>
*
* @author Ignacio Nunez <nacho@ownyourbits.com>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\PreviewGenerator\Migration;

use OCP\DB\ISchemaWrapper;
use OCP\Migration\SimpleMigrationStep;
use OCP\Migration\IOutput;
use Doctrine\DBAL\Types\Type;

class Version020200Date20190608205303 extends SimpleMigrationStep {

/**
* @param IOutput $output
* @param \Closure $schemaClosure The `\Closure` returns a `ISchemaWrapper`
* @param array $options
* @return null|ISchemaWrapper
*/
public function changeSchema(IOutput $output, \Closure $schemaClosure, array $options) {
/** @var ISchemaWrapper $schema */
$schema = $schemaClosure();
$table = $schema->getTable('preview_generation');

if (!$table->hasColumn('locked')) {
$table->addColumn('locked', Type::BOOLEAN, [
'notnull' => true,
'default' => 0,
]);
}
return $schema;
}
}