Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements by providing caching for Configuration objects built from JSON configuration files #950

Merged
merged 16 commits into from
May 31, 2019
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion background_scripts/etl_aggregation_table_manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
// Parse the ETL configuration. We will need it for listing available ingestors, aggregators, etc.

try {
$etlConfig = new EtlConfiguration($scriptOptions['config-file']);
$etlConfig = EtlConfiguration::factory($scriptOptions['config-file']);
} catch ( Exception $e ) {
exit($e->getMessage() . "\n");
}
Expand Down
11 changes: 11 additions & 0 deletions classes/CCR/Loggable.php
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,15 @@ public function __toString()
{
return "(" . get_class($this) . ")";
} // __toString()

/* ------------------------------------------------------------------------------------------
* Do not allow serialization of the logger as it may contain a PDO resource, which cannot
* be serialized.
* ------------------------------------------------------------------------------------------
*/

public function __sleep()
{
return array();
}
} // class Loggable
244 changes: 180 additions & 64 deletions classes/Configuration/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class Configuration extends Loggable implements \Iterator

/**
* The configuration constructed from the parsed file after processing any transformers and
* performing manipulations. Note that this is cleared if cleanup() is called.
* performing manipulations.
* @var stdClass
*/

Expand Down Expand Up @@ -178,11 +178,174 @@ class Configuration extends Loggable implements \Iterator
*
* @var boolean
*/

protected $forceArrayReturn = false;

/**
* Enable or disable the object cache to improve performance. Enabling the object cache means
* that a Configuration object will only need to be created once for a given global file.
*
* @var boolean
*/

protected static $enableObjectCache = true;

/**
* Associative array used as a key value store to cache parsed and transformed configuration
* files to improve performance. Keys are the fully qualified path of the global file and
* values are parsed and transformed objects generated from that file and any local files.
*
* @var array
*/

protected static $objectCache = array();

/**
* A factory / helper method for instantiating a Configuration object, initializing it, and
* returning the results of its `toAssocArray` function.
*
* @param string $filename the base configuration file name to be processed.
* @param string|null $baseDir the directory in which $filename can be found.
* @param Log|null $logger a Log instance that Configuration will utilize during its processing.
* @param array $options options that will be used during construction of the Configuration object.
*
* @return array the results of the instantiated configuration objects `toAssocArray` function.
*/

public static function assocArrayFactory(
$filename,
$baseDir = null,
Log $logger = null,
array $options = array()
) {

return self::factory($filename, $baseDir, $logger, $options)->toAssocArray();
}

/**
* A helper function that instantiates, initializes, and returns a Configuration object.
*
* @param string $filename the base configuration file name to be processed.
* @param string|null $baseDir the directory in which $filename can be found.
* @param Log|null $logger a Log instance that Configuration will utilize during its processing.
* @param array $options options that will be used during construction of the Configuration object.
*
* @return Configuration an initialized instance of Configuration.
*/

public static function factory(
$filename,
$baseDir = null,
Log $logger = null,
array $options = array()
) {
if ( empty($filename) ) {
$msg = sprintf("%s: Configuration filename cannot be empty", get_called_class());
if ( null !== $logger ) {
$logger->error($msg);
}
throw new Exception($msg);
}

$startTime = microtime(true);

// We need the fully qualified path for the object cache key because there could be two
// files with the same base name in different directories.
//
// If the base directory was provided use that value and ensure the filename is
// fully qualified. If not provided, use the dirname of the configuration filename
// (which may be the current directory) and qualify the filename with the current
// directory so the correct, fully qualified path, shows in the logs.

if ( null === $baseDir ) {
// This will be used for the main config file, any sub-files should have the
// base explicitly dir passed to them.
$baseDir = \xd_utilities\qualify_path(dirname($filename), getcwd());
$filename = \xd_utilities\qualify_path($filename, getcwd());
} else {
$filename = \xd_utilities\qualify_path($filename, $baseDir);
}

$inCache = false;
$cacheKey = null;

// If this is a local config file, do not attempt to pull it out of the cache because its
// contents depend on options passed in from the global config. If the global config was
// retrieved from the cache we won't be processing the local configs anyway.

$skipCacheCheck = ( array_key_exists('is_local_config', $options) && $options['is_local_config'] );

if ( self::$enableObjectCache && ! $skipCacheCheck ) {
// The cache key must take into account the full filename, the class that was actually
// instantiated, as well as any options provided as this may affect the generated
// object. We use serialize() instead of json_encode() because the latter only takes
// into account public member variables and we have more complex objects that can be
// passed as options such as VariableStore.
$cacheKey = md5($filename . '|' . get_called_class() . '|' . serialize($options));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we ever have a hash collision then it will be a pain in the neck to try to debug the problem!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. Although I can generate the key leaving the filename and class as plain text and calculating the md5 only on the serialized object.

$inCache = array_key_exists($cacheKey, self::$objectCache);
}

if ( ! self::$enableObjectCache || ! $inCache || $skipCacheCheck ) {
// Let the constructor know that it was called via factory()
$options['called_via_factory'] = true;
$instance = new static($filename, $baseDir, $logger, $options);
$instance->initialize();
if ( null !== $logger ) {
$logger->trace(
sprintf(
"Created %s%s (%s) in %fs",
($skipCacheCheck ? "local " : ""),
get_called_class(),
$filename,
microtime(true) - $startTime
)
);
}
if ( self::$enableObjectCache ) {
self::$objectCache[$cacheKey] = $instance;
}
return $instance;
} else {
if ( null !== $logger ) {
$logger->trace(
sprintf(
"Fetching object %s (%s) from cache with key %s in %fs",
get_called_class(),
$filename,
$cacheKey,
microtime(true) - $startTime
)
);
}
return self::$objectCache[$cacheKey];
}
}

/**
* Enable the object cache.
*/

public static function enableObjectCache()
{
self::$enableObjectCache = true;
}

/**
* Disable the object cache and clear any cached objects.
*/

public static function disableObjectCache()
{
self::$enableObjectCache = false;
self::$objectCache = array();
}

/** -----------------------------------------------------------------------------------------
* Constructor. Read and parse the configuration file.
*
* *** Note that the constructor should not be called directly (e.g., new Configuration). Use
* *** the factory() method instead.
*
* @param string $filename Name of the JSON configuration file to parse
* @param string $baseDir Base directory for configuration files. Overrides the base dir provided in
* the top-level config file. If not set, use the same directory as the config file.
Expand Down Expand Up @@ -213,25 +376,24 @@ public function __construct(
) {
parent::__construct($logger);

if ( empty($filename) ) {
$this->logAndThrowException("Configuration filename cannot be empty");
}
// The constructor is not meant to be called directly, the factory method should be used
// instead. We cannot set the constructor to private because it extends Loggable, which has
// a public constructor, so we set a flag via factory() instead.

// If the base directory was provided use that value and ensure the filename is
// fully qualified. If not provided, use the dirname of the configuration filename
// (which may be the current directory) and qualify the filename with the current
// directory so the correct, fully qualified path, shows in the logs.
if ( ! array_key_exists('called_via_factory', $options) || ! $options['called_via_factory'] ) {
$this->logAndThrowException(
sprintf("Cannot call %s::__construct() directly, use factory() instead", get_called_class())
);
}

if ( null === $baseDir ) {
// This will be used for the main config file, any sub-files should have the
// base explicitly dir passed to them.
$this->baseDir = \xd_utilities\qualify_path(dirname($filename), getcwd());
$this->filename = \xd_utilities\qualify_path($filename, getcwd());
} else {
$this->baseDir = $baseDir;
$this->filename = \xd_utilities\qualify_path($filename, $this->baseDir);
if ( empty($filename) ) {
$this->logAndThrowException(sprintf("%s: Configuration filename cannot be empty", get_called_class()));
}

// The base directory and filename are expected to be qualifed in factory()
$this->baseDir = $baseDir;
$this->filename = $filename;

if ( isset($options['local_config_dir']) ) {

// Before continuing, make sure that the specified directory actually exists. It not
Expand All @@ -242,7 +404,6 @@ public function __construct(

$this->localConfigDir = $options['local_config_dir'];
} else {

// Set $localConfigDir to the default value.
$this->localConfigDir = implode(DIRECTORY_SEPARATOR, array($this->baseDir, sprintf("%s.d", basename($filename, '.json'))));
}
Expand Down Expand Up @@ -518,8 +679,7 @@ protected function processLocalConfig($localConfigFile)
// the method on. This allows classes extending Configuration to create instances of
// themselves when calling this method if they have not overriden it.

$localConfigObj = new static($localConfigFile, $this->baseDir, $this->logger, $options);
$localConfigObj->initialize();
$localConfigObj = static::factory($localConfigFile, $this->baseDir, $this->logger, $options);

return $localConfigObj;

Expand Down Expand Up @@ -690,15 +850,13 @@ private function recursivelySubstituteVariables($traversable)
}

/** -----------------------------------------------------------------------------------------
* Clean up intermediate information that we don't need to keep around after processing. This
* includes parsed and constructed JSON.
* Clean up intermediate information that we don't need to keep around after processing.
* ------------------------------------------------------------------------------------------
*/

public function cleanup()
{
$this->parsedConfig = null;
$this->transformedConfig = null;
} // cleanup()

/** -----------------------------------------------------------------------------------------
Expand Down Expand Up @@ -1113,48 +1271,6 @@ public function getFilename()
return $this->filename;
}

/**
* A factory / helper method for instantiating a Configuration object, initializing it, and
* returning the results of its `toAssocArray` function.
*
* @param string $filename the base configuration file name to be processed.
* @param string|null $baseDir the directory in which $filename can be found.
* @param Log|null $logger a Log instance that Configuration will utilize during its processing.
* @param array $options options that will be used during construction of the Configuration object.
*
* @return array the results of the instantiated configuration objects `toAssocArray` function.
*/
public static function assocArrayFactory(
$filename,
$baseDir = null,
Log $logger = null,
array $options = array()
) {

return self::factory($filename, $baseDir, $logger, $options)->toAssocArray();
}

/**
* A helper function that instantiates, initializes, and returns a Configuration object.
*
* @param string $filename the base configuration file name to be processed.
* @param string|null $baseDir the directory in which $filename can be found.
* @param Log|null $logger a Log instance that Configuration will utilize during its processing.
* @param array $options options that will be used during construction of the Configuration object.
*
* @return Configuration an initialized instance of Configuration.
*/
public static function factory(
$filename,
$baseDir = null,
Log $logger = null,
array $options = array()
) {
$instance = new static($filename, $baseDir, $logger, $options);
$instance->initialize();
return $instance;
}

/** -----------------------------------------------------------------------------------------
* Get the configuration after applying transforms. Note that NULL will be returned if
* initialize() has not been called or if cleanup() has been called.
Expand Down
5 changes: 1 addition & 4 deletions classes/ETL/Configuration/EtlConfiguration.php
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,7 @@ protected function processLocalConfig($localConfigFile)
'default_module_name' => $this->defaultModuleName
);

$localConfigObj = new EtlConfiguration($localConfigFile, $this->baseDir, $this->logger, $options);
$localConfigObj->initialize();

return $localConfigObj;
return EtlConfiguration::factory($localConfigFile, $this->baseDir, $this->logger, $options);

} // processLocalConfig()

Expand Down
8 changes: 6 additions & 2 deletions classes/ETL/DataEndpoint/aStructuredFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,12 @@ protected function generateUniqueKey()
$keySource = array($this->type, $this->path, $this->mode);
foreach ( $this->filterDefinitions as $filter )
{
$keySource[] = $filter->path;
$keySource[] = $filter->arguments;
if ( isset($filter->path) ) {
$keySource[] = $filter->path;
}
if ( isset($filter->arguments) ) {
$keySource[] = $filter->arguments;
}
}
$this->key = md5(implode($this->keySeparator, $keySource));
}
Expand Down
4 changes: 2 additions & 2 deletions classes/ETL/DbModel/Entity.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ public function __construct($config, $systemQuoteChar = null, Log $logger = null
if ( null !== $config ) {

if ( is_string($config) ) {
$config = new \Configuration\Configuration($config);
$config = $config->initialize()->toStdClass();
$config = \Configuration\Configuration::factory($config);
$config = $config->toStdClass();
// Support the table config directly or assigned to a "table_definition" key
if ( isset($config->table_definition) ) {
$config = $config->table_definition;
Expand Down
3 changes: 1 addition & 2 deletions classes/ETL/Maintenance/ManageTables.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,11 @@ protected function createDestinationTableObjects()
$defFile = \xd_utilities\qualify_path($defFile, $this->options->paths->table_definition_dir);
}
$this->logger->info(sprintf("Parse table definition: '%s'", $defFile));
$tableConfig = new Configuration(
$tableConfig = Configuration::factory(
$defFile,
$this->options->paths->base_dir,
$this->logger
);
$tableConfig->initialize();
$etlTable = new Table(
$tableConfig->toStdClass(),
$this->destinationEndpoint->getSystemQuoteChar(),
Expand Down
3 changes: 1 addition & 2 deletions classes/ETL/Utilities.php
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,12 @@ public static function runEtlPipeline(array $pipelines, $logger, array $params =
$configOptions['config_variables'] = $params['variable-overrides'];
}

$etlConfig = new EtlConfiguration(
$etlConfig = EtlConfiguration::factory(
CONFIG_DIR . '/etl/etl.json',
null,
$logger,
$configOptions
);
$etlConfig->initialize();
self::setEtlConfig($etlConfig);

$scriptOptions = array_merge(
Expand Down
Loading