From ed0a5248b44beb32d6b6e589e1b7a20f1e8814e6 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Wed, 24 Oct 2018 15:05:12 -0400 Subject: [PATCH 01/18] Changes to xdmod-ingestor and xdmod-shredder to support cloud based event data ingestion --- bin/xdmod-ingestor | 39 ++++++- bin/xdmod-shredder | 2 +- .../OpenXdmod/DataWarehouseInitializer.php | 22 ++++ classes/OpenXdmod/Shredder.php | 1 - classes/OpenXdmod/Shredder/Genericcloud.php | 110 ++++++++++++++++++ classes/OpenXdmod/Shredder/Openstack.php | 110 ++++++++++++++++++ .../etl/etl.d/jobs_cloud_eucalyptus.json | 14 ++- 7 files changed, 287 insertions(+), 11 deletions(-) create mode 100644 classes/OpenXdmod/Shredder/Genericcloud.php create mode 100644 classes/OpenXdmod/Shredder/Openstack.php diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index 1b021a2635..3e895c3a0f 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -57,6 +57,10 @@ function main() array('', 'ingest-staging'), array('', 'ingest-hpcdb'), + array('', 'ingest-cloud-openstack'), + array('', 'ingest-cloud-generic'), + array('', 'aggregate-cloud'), + // Specify an ingestor. array('', 'ingestor:'), ); @@ -74,8 +78,8 @@ function main() exit(1); } - $help = $ingest = $aggregate = $noAppend = $ingestAll = $ingestShredded - = $ingestStaging = $ingestHpcdb = false; + $help = $ingest = $aggregate = $aggregateCloud = $noAppend = $ingestAll = $ingestShredded + = $ingestStaging = $ingestHpcdb = $ingestCloudOpenStack = $ingestCloudGeneric = false; $startDate = $endDate = $lastModifiedStartDate = null; @@ -130,6 +134,15 @@ function main() case 'ingest-hpcdb': $ingestHpcdb = true; break; + case 'ingest-cloud-openstack': + $ingestCloudOpenStack = true; + break; + case 'ingest-cloud-generic': + $ingestCloudGeneric = true; + break; + case 'aggregate-cloud'; + $aggregateCloud = true; + break; default: fwrite(STDERR, "Unexpected option '$key'\n"); exit(1); @@ -220,7 +233,7 @@ function main() $ingest = $aggregate = true; } // If any ingestion phase is specified, don't aggregate. - if ($ingestAll || $ingestShredded || $ingestStaging || $ingestHpcdb) { + if ($ingestAll || $ingestShredded || $ingestStaging || $ingestHpcdb || $ingestCloudGeneric || $ingestCloudOpenStack || $aggregateCloud) { $aggregate = false; } @@ -229,7 +242,7 @@ function main() try { // If no ingestion phase is specified, ingest all. - if (!$ingestShredded && !$ingestStaging && !$ingestHpcdb) { + if (!$ingestShredded && !$ingestStaging && !$ingestHpcdb && !$ingestCloudOpenStack && !$ingestCloudGeneric) { $ingestAll = true; } @@ -248,6 +261,14 @@ function main() if ($ingestHpcdb) { $dwi->ingestAllHpcdb($startDate, $endDate); } + + if($ingestCloudOpenStack){ + $dwi->ingestCloudDataOpenStack(); + } + + if($ingestCloudGeneric){ + $dwi->ingestCloudDataGeneric(); + } } } catch (Exception $e) { $logger->crit(array( @@ -259,10 +280,16 @@ function main() $logger->info('Done ingesting data'); } - if ($aggregate) { + if ($aggregate || $aggregateCloud) { $logger->info('Aggregating data'); try { - $dwi->aggregateAllJobs($lastModifiedStartDate); + if($aggregate){ + $dwi->aggregateAllJobs($lastModifiedStartDate); + } + + if($aggregateCloud){ + $dwi->aggregateCloudData(); + } } catch (Exception $e) { $logger->crit(array( 'message' => 'Aggregation failed: ' . $e->getMessage(), diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index 26175887fb..36c2af3486 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -233,7 +233,7 @@ function main() $logger->notice("Job errors written to '$jobErrorLogFile'"); } - if (!$dryRun) { + if (!$dryRun && !in_array($format, array('openstack', 'genericcloud'))) { $logger->notice('Normalizing data'); try { diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index fe05ed17f9..2b71fd6162 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -191,6 +191,28 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) ); } + public function ingestCloudDataOpenStack() + { + $this->logger->debug('Ingesting OpenStack data'); + $this->runEtlPipeline('jobs-cloud-extract-openstack'); + } + + public function ingestCloudDataGeneric() + { + $this->logger->debug('Ingesting cloud data in generic format'); + $this->runEtlPipeline('jobs-cloud-eucalyptus'); + } + + public function aggregateCloudData() + { + $this->logger->debug('Aggregating cloud data'); + $this->runEtlPipeline('cloud-state-pipeline'); + + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Cloud'); + } + /** * Initialize aggregate database. * diff --git a/classes/OpenXdmod/Shredder.php b/classes/OpenXdmod/Shredder.php index 1a55a57ba7..effa0211ae 100644 --- a/classes/OpenXdmod/Shredder.php +++ b/classes/OpenXdmod/Shredder.php @@ -1030,4 +1030,3 @@ protected function getResourceConfig($name) throw new Exception("No config found for '$name' in '$file'"); } } - diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php new file mode 100644 index 0000000000..b29640281b --- /dev/null +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -0,0 +1,110 @@ + + */ + +namespace OpenXdmod\Shredder; + +use Exception; +use CCR\DB\iDatabase; +use OpenXdmod\Shredder; +use ETL\EtlOverseer; +use ETL\iEtlOverseer; +use ETL\Configuration\EtlConfiguration; +use ETL\EtlOverseerOptions; +use ETL\Utilities; + +class Genericcloud extends Shredder +{ + + /** + * Time zone used when parsing datetimes. + * + * @var DateTimeZone + */ + protected $timeZone; + + /** + * @inheritdoc + */ + public function __construct(iDatabase $db) + { + $this->db = $db; + $this->logger = \Log::singleton('null'); + + $classPath = explode('\\', strtolower(get_class($this))); + $this->format = $classPath[count($classPath) - 1]; + } + + /** + * @inheritdoc + */ + public function shredFile($line) + { + throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the d option and specify a directory'); + + } + + protected function runEtlPipeline(array $pipelines, array $params=array()) + { + $this->logger->debug( + sprintf( + 'Shredding directory using ETL pipeline "%s" with parameters %s', + implode(', ', $pipelines), + json_encode($params) + ) + ); + + $configOptions = array('default_module_name' => 'xdmod'); + if( array_key_exists('variable_overrides', $params) ){ + $configOptions['config_variables'] = $params['variable_overrides']; + } + + $etlConfig = new EtlConfiguration( + CONFIG_DIR . '/etl/etl.json', + null, + $this->logger, + $configOptions + ); + $etlConfig->initialize(); + Utilities::setEtlConfig($etlConfig); + + $scriptOptions = array_merge( + array( + 'default-module-name' => 'xdmod', + 'process-sections' => $pipelines, + ), + $params + ); + $this->logger->debug( + sprintf( + 'Running ETL pipeline with script options %s', + json_encode($scriptOptions) + ) + ); + + $overseerOptions = new EtlOverseerOptions( + $scriptOptions, + $this->logger + ); + + $utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); + $overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); + + $overseer = new EtlOverseer($overseerOptions, $this->logger); + $overseer->execute($etlConfig); + } + + public function shredDirectory($directory) + { + $params = array( + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] + ); + + $this->runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $params); + $this->runEtlPipeline(array('jobs-cloud-ingest-eucalyptus')); + } +} diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php new file mode 100644 index 0000000000..6c8e5d2a03 --- /dev/null +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -0,0 +1,110 @@ + + */ + +namespace OpenXdmod\Shredder; + +use Exception; +use CCR\DB\iDatabase; +use OpenXdmod\Shredder; +use ETL\EtlOverseer; +use ETL\iEtlOverseer; +use ETL\Configuration\EtlConfiguration; +use ETL\EtlOverseerOptions; +use ETL\Utilities; + +class OpenStack extends Shredder +{ + + /** + * Time zone used when parsing datetimes. + * + * @var DateTimeZone + */ + protected $timeZone; + + /** + * @inheritdoc + */ + public function __construct(iDatabase $db) + { + $this->db = $db; + $this->logger = \Log::singleton('null'); + + $classPath = explode('\\', strtolower(get_class($this))); + $this->format = $classPath[count($classPath) - 1]; + } + + /** + * @inheritdoc + */ + public function shredFile($line) + { + throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the d option and specify a directory'); + + } + + protected function runEtlPipeline(array $pipelines, array $params=array()) + { + $this->logger->debug( + sprintf( + 'Shredding directory using ETL pipeline "%s" with parameters %s', + implode(', ', $pipelines), + json_encode($params) + ) + ); + + $configOptions = array('default_module_name' => 'xdmod'); + if( array_key_exists('variable-overrides', $params) ){ + $configOptions['config_variables'] = $params['variable-overrides']; + } + + $etlConfig = new EtlConfiguration( + CONFIG_DIR . '/etl/etl.json', + null, + $this->logger, + $configOptions + ); + $etlConfig->initialize(); + Utilities::setEtlConfig($etlConfig); + + $scriptOptions = array_merge( + array( + 'default-module-name' => 'xdmod', + 'process-sections' => $pipelines, + ), + $params + ); + $this->logger->debug( + sprintf( + 'Running ETL pipeline with script options %s', + json_encode($scriptOptions) + ) + ); + + $overseerOptions = new EtlOverseerOptions( + $scriptOptions, + $this->logger + ); + + $utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); + $overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); + + $overseer = new EtlOverseer($overseerOptions, $this->logger); + $overseer->execute($etlConfig); + } + + public function shredDirectory($directory) + { + $params = array( + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] + ); + + $this->runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources')); + $this->runEtlPipeline(array('jobs-cloud-ingest-openstack'), $params); + } +} diff --git a/configuration/etl/etl.d/jobs_cloud_eucalyptus.json b/configuration/etl/etl.d/jobs_cloud_eucalyptus.json index 686e6e3e97..d6eb627d2a 100644 --- a/configuration/etl/etl.d/jobs_cloud_eucalyptus.json +++ b/configuration/etl/etl.d/jobs_cloud_eucalyptus.json @@ -20,7 +20,13 @@ } }, - "jobs-cloud-eucalyptus": { + "jobs-cloud-ingest-eucalyptus": { + "namespace": "ETL\\Ingestor", + "options_class": "IngestorOptions", + "truncate_destination": false, + "enabled": true + }, + "jobs-cloud-extract-eucalyptus": { "namespace": "ETL\\Ingestor", "options_class": "IngestorOptions", "truncate_destination": false, @@ -30,7 +36,7 @@ "#": "Current Cloud job ingestion", - "jobs-cloud-eucalyptus": [ + "jobs-cloud-ingest-eucalyptus": [ { "name": "EucalyptusRawCloudEventLogIngestor", "description": "Raw Eucalyptus cloud instance data from log files", @@ -117,7 +123,9 @@ } }, "stop_on_exception": false - }, + } + ], + "jobs-cloud-extract-eucalyptus": [ { "name": "EucalyptusCloudAccountIngestor", "description": "Eucalyptus cloud account data", From 56ae3f95b92c2cd7d31a4ef4ec8748298905b432 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Thu, 8 Nov 2018 08:22:46 -0500 Subject: [PATCH 02/18] adding checking for exception for when cloud table do not exist. added etl pipeline function. removed previously added command line flags --- bin/xdmod-ingestor | 45 +++---- bin/xdmod-shredder | 2 +- classes/ETL/Utilities.php | 50 ++++++++ .../OpenXdmod/DataWarehouseInitializer.php | 57 +++++++-- classes/OpenXdmod/Shredder/Generalcloud.php | 53 +++++++++ classes/OpenXdmod/Shredder/Genericcloud.php | 110 ------------------ classes/OpenXdmod/Shredder/Openstack.php | 84 +++---------- 7 files changed, 187 insertions(+), 214 deletions(-) create mode 100644 classes/OpenXdmod/Shredder/Generalcloud.php delete mode 100644 classes/OpenXdmod/Shredder/Genericcloud.php diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index 3e895c3a0f..cb151e4114 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -42,7 +42,7 @@ function main() array('q', 'quiet'), array('', 'ingest'), - array('', 'aggregate'), + array('', 'aggregate::'), // Dates used by ingestion. array('', 'start-date:'), @@ -57,9 +57,8 @@ function main() array('', 'ingest-staging'), array('', 'ingest-hpcdb'), - array('', 'ingest-cloud-openstack'), - array('', 'ingest-cloud-generic'), - array('', 'aggregate-cloud'), + //Type of data that is being ingested. + array('', 'datatype:'), // Specify an ingestor. array('', 'ingestor:'), @@ -79,9 +78,9 @@ function main() } $help = $ingest = $aggregate = $aggregateCloud = $noAppend = $ingestAll = $ingestShredded - = $ingestStaging = $ingestHpcdb = $ingestCloudOpenStack = $ingestCloudGeneric = false; + = $ingestStaging = $ingestHpcdb = $datatype = false; - $startDate = $endDate = $lastModifiedStartDate = null; + $startDate = $endDate = $lastModifiedStartDate = $realmToAggregate = $datatypeValue = null; $logLevel = -1; @@ -112,6 +111,7 @@ function main() break; case 'aggregate': $aggregate = true; + $realmToAggregate = $value; break; case 'start-date': $startDate = $value; @@ -134,14 +134,9 @@ function main() case 'ingest-hpcdb': $ingestHpcdb = true; break; - case 'ingest-cloud-openstack': - $ingestCloudOpenStack = true; - break; - case 'ingest-cloud-generic': - $ingestCloudGeneric = true; - break; - case 'aggregate-cloud'; - $aggregateCloud = true; + case 'datatype': + $datatype = true; + $datatypeValue = $value; break; default: fwrite(STDERR, "Unexpected option '$key'\n"); @@ -198,6 +193,11 @@ function main() $logger->info("Using today '$endDate' for end date"); } + if($datatype !== false && $datatypeValue === null){ + $logger->crit("You must specify the type of data you want to ingest"); + exit(1); + } + $hpcdbDb = DB::factory('hpcdb'); $dwDb = DB::factory('datawarehouse'); @@ -233,7 +233,7 @@ function main() $ingest = $aggregate = true; } // If any ingestion phase is specified, don't aggregate. - if ($ingestAll || $ingestShredded || $ingestStaging || $ingestHpcdb || $ingestCloudGeneric || $ingestCloudOpenStack || $aggregateCloud) { + if ($ingestAll || $ingestShredded || $ingestStaging || $ingestHpcdb || $datatype) { $aggregate = false; } @@ -242,7 +242,7 @@ function main() try { // If no ingestion phase is specified, ingest all. - if (!$ingestShredded && !$ingestStaging && !$ingestHpcdb && !$ingestCloudOpenStack && !$ingestCloudGeneric) { + if (!$ingestShredded && !$ingestStaging && !$ingestHpcdb && !$datatype){ $ingestAll = true; } @@ -262,12 +262,12 @@ function main() $dwi->ingestAllHpcdb($startDate, $endDate); } - if($ingestCloudOpenStack){ + if($datatypeValue == 'openstack'){ $dwi->ingestCloudDataOpenStack(); } - if($ingestCloudGeneric){ - $dwi->ingestCloudDataGeneric(); + if($datatypeValue == 'generalcloud'){ + $dwi->ingestCloudDataGeneral(); } } } catch (Exception $e) { @@ -280,14 +280,15 @@ function main() $logger->info('Done ingesting data'); } - if ($aggregate || $aggregateCloud) { + if ($aggregate) { $logger->info('Aggregating data'); try { - if($aggregate){ + //If there is no realm specified to aggregate then all realms should be aggregated + if($realmToAggregate === null){ $dwi->aggregateAllJobs($lastModifiedStartDate); } - if($aggregateCloud){ + if($realmToAggregate == 'cloud' || $realmToAggregate === null){ $dwi->aggregateCloudData(); } } catch (Exception $e) { diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index 36c2af3486..bbd0589de2 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -233,7 +233,7 @@ function main() $logger->notice("Job errors written to '$jobErrorLogFile'"); } - if (!$dryRun && !in_array($format, array('openstack', 'genericcloud'))) { + if (!$dryRun && !in_array($format, array('openstack', 'generalcloud'))) { $logger->notice('Normalizing data'); try { diff --git a/classes/ETL/Utilities.php b/classes/ETL/Utilities.php index 7e18ec11d6..43e0049c87 100644 --- a/classes/ETL/Utilities.php +++ b/classes/ETL/Utilities.php @@ -323,4 +323,54 @@ public static function quoteVariables(array $variables, VariableStore $variableS return $localVariableMap; } // quoteVariables() + + public static function runEtlPipeline(array $pipelines, $logger, array $params=array()) + { + $logger->debug( + sprintf( + 'Shredding directory using ETL pipeline "%s" with parameters %s', + implode(', ', $pipelines), + json_encode($params) + ) + ); + + $configOptions = array('default_module_name' => 'xdmod'); + if( array_key_exists('variable-overrides', $params) ){ + $configOptions['config_variables'] = $params['variable-overrides']; + } + + $etlConfig = new EtlConfiguration( + CONFIG_DIR . '/etl/etl.json', + null, + $logger, + $configOptions + ); + $etlConfig->initialize(); + self::setEtlConfig($etlConfig); + + $scriptOptions = array_merge( + array( + 'default-module-name' => 'xdmod', + 'process-sections' => $pipelines, + ), + $params + ); + $logger->debug( + sprintf( + 'Running ETL pipeline with script options %s', + json_encode($scriptOptions) + ) + ); + + $overseerOptions = new EtlOverseerOptions( + $scriptOptions, + $logger + ); + + $utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); + $overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); + + $overseer = new EtlOverseer($overseerOptions, $logger); + $overseer->execute($etlConfig); + } // runEtlPipeline } // class Utilities diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 2b71fd6162..0132fb6c88 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -126,6 +126,8 @@ public function ingestAll($startDate = null, $endDate = null) $this->ingestAllShredded($startDate, $endDate); $this->ingestAllStaging($startDate, $endDate); $this->ingestAllHpcdb($startDate, $endDate); + $this->ingestCloudDataGeneral(); + $this->ingestCloudDataOpenStack(); } /** @@ -193,24 +195,57 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) public function ingestCloudDataOpenStack() { - $this->logger->debug('Ingesting OpenStack data'); - $this->runEtlPipeline('jobs-cloud-extract-openstack'); + try { + $this->logger->notice('Ingesting OpenStack event log data'); + $this->runEtlPipeline('jobs-cloud-extract-openstack'); + } + catch( Exception $e ) { + if( $e->getCode() == '1146' ){ + $this->logger->debug('Data for OpenStack event data has not been shredded. Skipping ingestion.'); + } + else { + throw $e; + } + } } - public function ingestCloudDataGeneric() + public function ingestCloudDataGeneral() { - $this->logger->debug('Ingesting cloud data in generic format'); - $this->runEtlPipeline('jobs-cloud-eucalyptus'); + try { + $this->logger->notice('Ingesting general cloud log files'); + $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); + } + catch( Exception $e ){ + //Error Code 42S02 can + if( $e->getCode() == '1146' ){ + $this->logger->debug('Data for general cloud event data has not been shredded. Skipping ingestion.'); + } + else { + throw $e; + } + } } - public function aggregateCloudData() + public function aggregateAll($lastModifiedStartDate) { - $this->logger->debug('Aggregating cloud data'); - $this->runEtlPipeline('cloud-state-pipeline'); + $this->aggregateAllJobs($lastModifiedStartDate); + $this->aggregateCloudData(); + } - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->setLogger($this->logger); - $filterListBuilder->buildRealmLists('Cloud'); + public function aggregateCloudData() + { + try { + $this->logger->notice('Aggregating Cloud data'); + $this->runEtlPipeline('cloud-state-pipeline'); + } + catch( Exception $e ) { + if( $e->getCode() == '1146' ){ + $this->logger->debug('Tables needed for aggregating the Cloud realm do not exist. Skipping Cloud realm aggregation'); + } + else { + throw $e; + } + } } /** diff --git a/classes/OpenXdmod/Shredder/Generalcloud.php b/classes/OpenXdmod/Shredder/Generalcloud.php new file mode 100644 index 0000000000..4b3a5e9fbf --- /dev/null +++ b/classes/OpenXdmod/Shredder/Generalcloud.php @@ -0,0 +1,53 @@ + + */ + +namespace OpenXdmod\Shredder; + +use CCR\DB\iDatabase; +use OpenXdmod\Shredder; +use ETL\Utilities; + +class Generalcloud extends Shredder +{ + /** + * @inheritdoc + */ + public function __construct(iDatabase $db) + { + $this->logger = \Log::singleton('null'); + } + + /** + * @inheritdoc + */ + public function shredFile($line) + { + throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the -d option and specify a directory'); + + } + + /** + * @inheritdoc + */ + public function shredDirectory($directory) + { + if (!is_dir($directory)) { + $this->logger->err("'$directory' is not a directory"); + return false; + } + + Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); + Utilities::runEtlPipeline( + array('jobs-cloud-ingest-eucalyptus'), + $this->logger, + array( + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] + ) + ); + } +} diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php deleted file mode 100644 index b29640281b..0000000000 --- a/classes/OpenXdmod/Shredder/Genericcloud.php +++ /dev/null @@ -1,110 +0,0 @@ - - */ - -namespace OpenXdmod\Shredder; - -use Exception; -use CCR\DB\iDatabase; -use OpenXdmod\Shredder; -use ETL\EtlOverseer; -use ETL\iEtlOverseer; -use ETL\Configuration\EtlConfiguration; -use ETL\EtlOverseerOptions; -use ETL\Utilities; - -class Genericcloud extends Shredder -{ - - /** - * Time zone used when parsing datetimes. - * - * @var DateTimeZone - */ - protected $timeZone; - - /** - * @inheritdoc - */ - public function __construct(iDatabase $db) - { - $this->db = $db; - $this->logger = \Log::singleton('null'); - - $classPath = explode('\\', strtolower(get_class($this))); - $this->format = $classPath[count($classPath) - 1]; - } - - /** - * @inheritdoc - */ - public function shredFile($line) - { - throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the d option and specify a directory'); - - } - - protected function runEtlPipeline(array $pipelines, array $params=array()) - { - $this->logger->debug( - sprintf( - 'Shredding directory using ETL pipeline "%s" with parameters %s', - implode(', ', $pipelines), - json_encode($params) - ) - ); - - $configOptions = array('default_module_name' => 'xdmod'); - if( array_key_exists('variable_overrides', $params) ){ - $configOptions['config_variables'] = $params['variable_overrides']; - } - - $etlConfig = new EtlConfiguration( - CONFIG_DIR . '/etl/etl.json', - null, - $this->logger, - $configOptions - ); - $etlConfig->initialize(); - Utilities::setEtlConfig($etlConfig); - - $scriptOptions = array_merge( - array( - 'default-module-name' => 'xdmod', - 'process-sections' => $pipelines, - ), - $params - ); - $this->logger->debug( - sprintf( - 'Running ETL pipeline with script options %s', - json_encode($scriptOptions) - ) - ); - - $overseerOptions = new EtlOverseerOptions( - $scriptOptions, - $this->logger - ); - - $utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); - $overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); - - $overseer = new EtlOverseer($overseerOptions, $this->logger); - $overseer->execute($etlConfig); - } - - public function shredDirectory($directory) - { - $params = array( - 'include-only-resource-codes' => $this->resource, - 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] - ); - - $this->runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $params); - $this->runEtlPipeline(array('jobs-cloud-ingest-eucalyptus')); - } -} diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index 6c8e5d2a03..e58bc5690e 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -7,35 +7,18 @@ namespace OpenXdmod\Shredder; -use Exception; use CCR\DB\iDatabase; use OpenXdmod\Shredder; -use ETL\EtlOverseer; -use ETL\iEtlOverseer; -use ETL\Configuration\EtlConfiguration; -use ETL\EtlOverseerOptions; use ETL\Utilities; class OpenStack extends Shredder { - - /** - * Time zone used when parsing datetimes. - * - * @var DateTimeZone - */ - protected $timeZone; - /** * @inheritdoc */ public function __construct(iDatabase $db) { - $this->db = $db; $this->logger = \Log::singleton('null'); - - $classPath = explode('\\', strtolower(get_class($this))); - $this->format = $classPath[count($classPath) - 1]; } /** @@ -43,68 +26,29 @@ public function __construct(iDatabase $db) */ public function shredFile($line) { - throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the d option and specify a directory'); + throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the -d option and specify a directory'); } - protected function runEtlPipeline(array $pipelines, array $params=array()) + /** + * @inheritdoc + */ + public function shredDirectory($directory) { - $this->logger->debug( - sprintf( - 'Shredding directory using ETL pipeline "%s" with parameters %s', - implode(', ', $pipelines), - json_encode($params) - ) - ); - - $configOptions = array('default_module_name' => 'xdmod'); - if( array_key_exists('variable-overrides', $params) ){ - $configOptions['config_variables'] = $params['variable-overrides']; + if (!is_dir($directory)) { + $this->logger->err("'$directory' is not a directory"); + return false; } - $etlConfig = new EtlConfiguration( - CONFIG_DIR . '/etl/etl.json', - null, + $this->logger->debug("Shredding directory '$directory'"); + Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); + Utilities::runEtlPipeline( + array('jobs-cloud-ingest-openstack'), $this->logger, - $configOptions - ); - $etlConfig->initialize(); - Utilities::setEtlConfig($etlConfig); - - $scriptOptions = array_merge( array( - 'default-module-name' => 'xdmod', - 'process-sections' => $pipelines, - ), - $params - ); - $this->logger->debug( - sprintf( - 'Running ETL pipeline with script options %s', - json_encode($scriptOptions) + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] ) ); - - $overseerOptions = new EtlOverseerOptions( - $scriptOptions, - $this->logger - ); - - $utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); - $overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); - - $overseer = new EtlOverseer($overseerOptions, $this->logger); - $overseer->execute($etlConfig); - } - - public function shredDirectory($directory) - { - $params = array( - 'include-only-resource-codes' => $this->resource, - 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] - ); - - $this->runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources')); - $this->runEtlPipeline(array('jobs-cloud-ingest-openstack'), $params); } } From c10909bc6d108add17cafdd4c927abee82005440 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 26 Nov 2018 08:09:37 -0500 Subject: [PATCH 03/18] building filter list after running cloud aggregation in ingestor script --- classes/OpenXdmod/DataWarehouseInitializer.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 0132fb6c88..29d49319cf 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -216,7 +216,6 @@ public function ingestCloudDataGeneral() $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); } catch( Exception $e ){ - //Error Code 42S02 can if( $e->getCode() == '1146' ){ $this->logger->debug('Data for general cloud event data has not been shredded. Skipping ingestion.'); } @@ -246,6 +245,10 @@ public function aggregateCloudData() throw $e; } } + + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Cloud'); } /** From b55927ab7f2c5efb3d934930b9731ff8afa000ee Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 26 Nov 2018 15:07:14 -0500 Subject: [PATCH 04/18] add cloud shredder abstract class to reduce code duplication. renamed Generalcloud.php to Genericcloud.php. replaces etl_overseer script calls in bootstrap.sh to use xdmod-shredder and xdmod-ingestor instead --- bin/xdmod-ingestor | 8 +-- bin/xdmod-shredder | 2 +- .../OpenXdmod/DataWarehouseInitializer.php | 37 ++++++----- classes/OpenXdmod/Shredder/Cloud.php | 61 +++++++++++++++++++ classes/OpenXdmod/Shredder/Generalcloud.php | 53 ---------------- classes/OpenXdmod/Shredder/Genericcloud.php | 19 ++++++ classes/OpenXdmod/Shredder/Openstack.php | 39 +----------- .../integration_tests/scripts/bootstrap.sh | 8 +-- 8 files changed, 111 insertions(+), 116 deletions(-) create mode 100644 classes/OpenXdmod/Shredder/Cloud.php delete mode 100644 classes/OpenXdmod/Shredder/Generalcloud.php create mode 100644 classes/OpenXdmod/Shredder/Genericcloud.php diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index cb151e4114..93a91040d4 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -77,7 +77,7 @@ function main() exit(1); } - $help = $ingest = $aggregate = $aggregateCloud = $noAppend = $ingestAll = $ingestShredded + $help = $ingest = $aggregate = $noAppend = $ingestAll = $ingestShredded = $ingestStaging = $ingestHpcdb = $datatype = false; $startDate = $endDate = $lastModifiedStartDate = $realmToAggregate = $datatypeValue = null; @@ -266,8 +266,8 @@ function main() $dwi->ingestCloudDataOpenStack(); } - if($datatypeValue == 'generalcloud'){ - $dwi->ingestCloudDataGeneral(); + if($datatypeValue == 'genericcloud'){ + $dwi->ingestCloudDataGeneric(); } } } catch (Exception $e) { @@ -284,7 +284,7 @@ function main() $logger->info('Aggregating data'); try { //If there is no realm specified to aggregate then all realms should be aggregated - if($realmToAggregate === null){ + if($realmToAggregate == 'job' || $realmToAggregate === null){ $dwi->aggregateAllJobs($lastModifiedStartDate); } diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index bbd0589de2..36c2af3486 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -233,7 +233,7 @@ function main() $logger->notice("Job errors written to '$jobErrorLogFile'"); } - if (!$dryRun && !in_array($format, array('openstack', 'generalcloud'))) { + if (!$dryRun && !in_array($format, array('openstack', 'genericcloud'))) { $logger->notice('Normalizing data'); try { diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 29d49319cf..ebcb66223b 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -126,7 +126,7 @@ public function ingestAll($startDate = null, $endDate = null) $this->ingestAllShredded($startDate, $endDate); $this->ingestAllStaging($startDate, $endDate); $this->ingestAllHpcdb($startDate, $endDate); - $this->ingestCloudDataGeneral(); + $this->ingestCloudDataGeneric(); $this->ingestCloudDataOpenStack(); } @@ -193,6 +193,11 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) ); } + /** + * Extracting openstack data from the openstack_raw_events table. If the raw + * tables do not exist then catch the resulting exception and display a message + * saying that there is no OpenStack data to ingest. + */ public function ingestCloudDataOpenStack() { try { @@ -209,15 +214,20 @@ public function ingestCloudDataOpenStack() } } - public function ingestCloudDataGeneral() + /** + * Extracting openstack data from the generic_raw_events table. If the raw + * tables do not exist then catch the resulting exception and display a message + * saying that there is no generic cloud data to ingest. + */ + public function ingestCloudDataGeneric() { try { - $this->logger->notice('Ingesting general cloud log files'); + $this->logger->notice('Ingesting generic cloud log files'); $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); } catch( Exception $e ){ if( $e->getCode() == '1146' ){ - $this->logger->debug('Data for general cloud event data has not been shredded. Skipping ingestion.'); + $this->logger->debug('Data for generic cloud event data has not been shredded. Skipping ingestion.'); } else { throw $e; @@ -225,17 +235,20 @@ public function ingestCloudDataGeneral() } } - public function aggregateAll($lastModifiedStartDate) - { - $this->aggregateAllJobs($lastModifiedStartDate); - $this->aggregateCloudData(); - } - + /** + * Aggregating all cloud data. If the appropriate tables do not exist then + * catch the resulting exception and display a message saying that there + * is no cloud data to aggregate and cloud aggregation is being skipped. + */ public function aggregateCloudData() { try { $this->logger->notice('Aggregating Cloud data'); $this->runEtlPipeline('cloud-state-pipeline'); + + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Cloud'); } catch( Exception $e ) { if( $e->getCode() == '1146' ){ @@ -245,10 +258,6 @@ public function aggregateCloudData() throw $e; } } - - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->setLogger($this->logger); - $filterListBuilder->buildRealmLists('Cloud'); } /** diff --git a/classes/OpenXdmod/Shredder/Cloud.php b/classes/OpenXdmod/Shredder/Cloud.php new file mode 100644 index 0000000000..bde3be0d69 --- /dev/null +++ b/classes/OpenXdmod/Shredder/Cloud.php @@ -0,0 +1,61 @@ + + */ +namespace OpenXdmod\Shredder; + +use Exception; +use DateTime; +use DateTimeZone; +use CCR\DB\iDatabase; +use OpenXdmod\Shredder; +use ETL\Utilities; + +abstract class Cloud extends Shredder{ + + /** + * @inheritdoc + */ + public function __construct(iDatabase $db) + { + $this->logger = \Log::singleton('null'); + } + + /** + * Shredding by specifing a single file in not supported by the cloud pipelines. + * Throw an exception if someone tries to shred cloud data using the -i flag instead + * of using -d + */ + public function shredFile($line) + { + throw new Exception('Cloud resources do not support shredding by file. Please use the -d option and specify a directory'); + } + + /** + * @inheritdoc + */ + public function shredDirectory($directory, array $pipelines) + { + if (!is_dir($directory)) { + $this->logger->err("'$directory' is not a directory"); + return false; + } + + if (empty($pipelines)) { + $this->logger->err("A pipeline to run was not specified. Please provide a pipeline to run."); + return false; + } + + Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); + Utilities::runEtlPipeline( + $pipelines, + $this->logger, + array( + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] + ) + ); + } +} diff --git a/classes/OpenXdmod/Shredder/Generalcloud.php b/classes/OpenXdmod/Shredder/Generalcloud.php deleted file mode 100644 index 4b3a5e9fbf..0000000000 --- a/classes/OpenXdmod/Shredder/Generalcloud.php +++ /dev/null @@ -1,53 +0,0 @@ - - */ - -namespace OpenXdmod\Shredder; - -use CCR\DB\iDatabase; -use OpenXdmod\Shredder; -use ETL\Utilities; - -class Generalcloud extends Shredder -{ - /** - * @inheritdoc - */ - public function __construct(iDatabase $db) - { - $this->logger = \Log::singleton('null'); - } - - /** - * @inheritdoc - */ - public function shredFile($line) - { - throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the -d option and specify a directory'); - - } - - /** - * @inheritdoc - */ - public function shredDirectory($directory) - { - if (!is_dir($directory)) { - $this->logger->err("'$directory' is not a directory"); - return false; - } - - Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); - Utilities::runEtlPipeline( - array('jobs-cloud-ingest-eucalyptus'), - $this->logger, - array( - 'include-only-resource-codes' => $this->resource, - 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] - ) - ); - } -} diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php new file mode 100644 index 0000000000..90438f9fe7 --- /dev/null +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -0,0 +1,19 @@ + + */ + +namespace OpenXdmod\Shredder; + +class Genericcloud extends Cloud +{ + /** + * @inheritdoc + */ + public function shredDirectory($directory) + { + parent::shredDirectory($directory, ['jobs-cloud-ingest-eucalyptus']); + } +} diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index e58bc5690e..233f31637c 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -7,48 +7,13 @@ namespace OpenXdmod\Shredder; -use CCR\DB\iDatabase; -use OpenXdmod\Shredder; -use ETL\Utilities; - -class OpenStack extends Shredder +class OpenStack extends Cloud { - /** - * @inheritdoc - */ - public function __construct(iDatabase $db) - { - $this->logger = \Log::singleton('null'); - } - - /** - * @inheritdoc - */ - public function shredFile($line) - { - throw new Exception('The OpenStack shredder does not supported shredding by file. Please use the -d option and specify a directory'); - - } - /** * @inheritdoc */ public function shredDirectory($directory) { - if (!is_dir($directory)) { - $this->logger->err("'$directory' is not a directory"); - return false; - } - - $this->logger->debug("Shredding directory '$directory'"); - Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); - Utilities::runEtlPipeline( - array('jobs-cloud-ingest-openstack'), - $this->logger, - array( - 'include-only-resource-codes' => $this->resource, - 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] - ) - ); + parent::shredDirectory($directory, ['jobs-cloud-ingest-openstack']); } } diff --git a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh index 10eb5f5dac..1da7258058 100755 --- a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh +++ b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh @@ -30,6 +30,7 @@ then sudo -u xdmod xdmod-shredder -r `basename $resource .log` -f slurm -i $resource; done sudo -u xdmod xdmod-ingestor + sudo -u xdmod xdmod-shredder -r openstack -d $REF_DIR/openstack -f openstack sudo -u xdmod xdmod-import-csv -t names -i $REF_DIR/names.csv sudo -u xdmod xdmod-ingestor php /root/bin/createusers.php @@ -38,13 +39,6 @@ then sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p xdmod.acls-import #Updating minmaxdate table so data for cloud realm shows up mysql -e "UPDATE modw.minmaxdate SET max_job_date = '2018-07-01';" - #Ingesting cloud data from references folder - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p jobs-common - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p jobs-cloud-common - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p ingest-resources - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p jobs-cloud-ingest-openstack -r openstack -d "CLOUD_EVENT_LOG_DIRECTORY=$REF_DIR/openstack" - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p jobs-cloud-extract-openstack - sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p cloud-state-pipeline fi if [ "$XDMOD_TEST_MODE" = "upgrade" ]; From a1d938865f56f34706ff284c441fad08f9eec684 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Wed, 28 Nov 2018 15:21:02 -0500 Subject: [PATCH 05/18] added new function to check if a realm is enabled before trying to ingest data for that realm. moved ingestion of organizations and resource_types to their own etl json files so they can be ingested independent of the jobs pipelines. --- bin/xdmod-shredder | 2 +- .../OpenXdmod/DataWarehouseInitializer.php | 146 ++++++++++-------- classes/OpenXdmod/Shredder/Genericcloud.php | 2 +- classes/OpenXdmod/Shredder/Openstack.php | 2 +- .../Shredder/{Cloud.php => aCloud.php} | 6 +- configuration/etl/etl.d/organizations.json | 88 +++++++++++ configuration/etl/etl.d/resource_types.json | 77 +++++++++ .../integration_tests/scripts/bootstrap.sh | 2 +- 8 files changed, 249 insertions(+), 76 deletions(-) rename classes/OpenXdmod/Shredder/{Cloud.php => aCloud.php} (91%) create mode 100644 configuration/etl/etl.d/organizations.json create mode 100644 configuration/etl/etl.d/resource_types.json diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index 36c2af3486..a67ec6697b 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -233,7 +233,7 @@ function main() $logger->notice("Job errors written to '$jobErrorLogFile'"); } - if (!$dryRun && !in_array($format, array('openstack', 'genericcloud'))) { + if (!$dryRun && in_array($format, array('pbs', 'slurm', 'lsf', 'sge', 'uge'))) { $logger->notice('Normalizing data'); try { diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index ebcb66223b..bab4f28280 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -138,9 +138,11 @@ public function ingestAll($startDate = null, $endDate = null) */ public function ingestAllShredded($startDate = null, $endDate = null) { - $this->logger->debug('Ingesting shredded data to staging tables'); - $this->runEtlPipeline('staging-ingest-common'); - $this->runEtlPipeline('staging-ingest-jobs'); + if( $this->realmEnabled('Jobs')){ + $this->logger->debug('Ingesting shredded data to staging tables'); + $this->runEtlPipeline('staging-ingest-common'); + $this->runEtlPipeline('staging-ingest-jobs'); + } } /** @@ -151,9 +153,11 @@ public function ingestAllShredded($startDate = null, $endDate = null) */ public function ingestAllStaging($startDate = null, $endDate = null) { - $this->logger->debug('Ingesting staging data to HPCDB'); - $this->runEtlPipeline('hpcdb-ingest-common'); - $this->runEtlPipeline('hpcdb-ingest-jobs'); + if( $this->realmEnabled('Jobs')){ + $this->logger->debug('Ingesting staging data to HPCDB'); + $this->runEtlPipeline('hpcdb-ingest-common'); + $this->runEtlPipeline('hpcdb-ingest-jobs'); + } } /** @@ -164,33 +168,35 @@ public function ingestAllStaging($startDate = null, $endDate = null) */ public function ingestAllHpcdb($startDate = null, $endDate = null) { - $this->logger->debug('Ingesting HPCDB data to modw'); - - if ($startDate !== null || $endDate !== null) { - $params = array(); - if ($startDate !== null) { - $params['start-date'] = $startDate . ' 00:00:00'; - } - if ($endDate !== null) { - $params['end-date'] = $endDate . ' 23:59:59'; + if( $this->realmEnabled('Jobs')){ + $this->logger->debug('Ingesting HPCDB data to modw'); + + if ($startDate !== null || $endDate !== null) { + $params = array(); + if ($startDate !== null) { + $params['start-date'] = $startDate . ' 00:00:00'; + } + if ($endDate !== null) { + $params['end-date'] = $endDate . ' 23:59:59'; + } + $this->runEtlPipeline( + 'hpcdb-prep-xdw-job-ingest-by-date-range', + $params + ); + } else { + $this->runEtlPipeline('hpcdb-prep-xdw-job-ingest-by-new-jobs'); } + + // Use current time from the database in case clocks are not + // synchronized. + $lastModifiedStartDate + = $this->hpcdbDb->query('SELECT NOW() AS now FROM dual')[0]['now']; + $this->runEtlPipeline( - 'hpcdb-prep-xdw-job-ingest-by-date-range', - $params + 'hpcdb-xdw-ingest', + array('last-modified-start-date' => $lastModifiedStartDate) ); - } else { - $this->runEtlPipeline('hpcdb-prep-xdw-job-ingest-by-new-jobs'); } - - // Use current time from the database in case clocks are not - // synchronized. - $lastModifiedStartDate - = $this->hpcdbDb->query('SELECT NOW() AS now FROM dual')[0]['now']; - - $this->runEtlPipeline( - 'hpcdb-xdw-ingest', - array('last-modified-start-date' => $lastModifiedStartDate) - ); } /** @@ -200,16 +206,18 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) */ public function ingestCloudDataOpenStack() { - try { - $this->logger->notice('Ingesting OpenStack event log data'); - $this->runEtlPipeline('jobs-cloud-extract-openstack'); - } - catch( Exception $e ) { - if( $e->getCode() == '1146' ){ - $this->logger->debug('Data for OpenStack event data has not been shredded. Skipping ingestion.'); + if( $this->realmEnabled('Cloud') ){ + try{ + $this->logger->notice('Ingesting OpenStack event log data'); + $this->runEtlPipeline('jobs-cloud-extract-openstack'); } - else { - throw $e; + catch( Exception $e ){ + if( $e->getCode() == 1146 ){ + $this->logger->notice('No OpenStack events to ingest'); + } + else{ + throw $e; + } } } } @@ -221,16 +229,18 @@ public function ingestCloudDataOpenStack() */ public function ingestCloudDataGeneric() { - try { - $this->logger->notice('Ingesting generic cloud log files'); - $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); - } - catch( Exception $e ){ - if( $e->getCode() == '1146' ){ - $this->logger->debug('Data for generic cloud event data has not been shredded. Skipping ingestion.'); + if( $this->realmEnabled('Cloud')){ + try{ + $this->logger->notice('Ingesting generic cloud log files'); + $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); } - else { - throw $e; + catch( Exception $e ){ + if( $e->getCode() == 1146 ){ + $this->logger->notice('No cloud event data to ingest'); + } + else{ + throw $e; + } } } } @@ -242,21 +252,13 @@ public function ingestCloudDataGeneric() */ public function aggregateCloudData() { - try { - $this->logger->notice('Aggregating Cloud data'); - $this->runEtlPipeline('cloud-state-pipeline'); + if( $this->realmEnabled('Cloud')){ + $this->logger->notice('Aggregating Cloud data'); + $this->runEtlPipeline('cloud-state-pipeline'); - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->setLogger($this->logger); - $filterListBuilder->buildRealmLists('Cloud'); - } - catch( Exception $e ) { - if( $e->getCode() == '1146' ){ - $this->logger->debug('Tables needed for aggregating the Cloud realm do not exist. Skipping Cloud realm aggregation'); - } - else { - throw $e; - } + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Cloud'); } } @@ -286,13 +288,15 @@ public function initializeAggregation($startDate = null, $endDate = null) */ public function aggregateAllJobs($lastModifiedStartDate) { - $this->runEtlPipeline( - 'jobs-xdw-aggregate', - array('last-modified-start-date' => $lastModifiedStartDate) - ); - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->setLogger($this->logger); - $filterListBuilder->buildRealmLists('Jobs'); + if( $this->realmEnabled('Jobs') ){ + $this->runEtlPipeline( + 'jobs-xdw-aggregate', + array('last-modified-start-date' => $lastModifiedStartDate) + ); + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Jobs'); + } } /** @@ -344,6 +348,12 @@ public function aggregate( )); } + private function realmEnabled($realm) + { + $realms = $this->warehouseDb->query("SELECT * FROM moddb.realms WHERE display = :realm", [':realm' => $realm]); + return (count($realms) > 0) ? true : false; + } + /** * Run an ETL pipeline. * diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php index 90438f9fe7..18bd6758d8 100644 --- a/classes/OpenXdmod/Shredder/Genericcloud.php +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -7,7 +7,7 @@ namespace OpenXdmod\Shredder; -class Genericcloud extends Cloud +class Genericcloud extends aCloud { /** * @inheritdoc diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index 233f31637c..284bb47726 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -7,7 +7,7 @@ namespace OpenXdmod\Shredder; -class OpenStack extends Cloud +class OpenStack extends aCloud { /** * @inheritdoc diff --git a/classes/OpenXdmod/Shredder/Cloud.php b/classes/OpenXdmod/Shredder/aCloud.php similarity index 91% rename from classes/OpenXdmod/Shredder/Cloud.php rename to classes/OpenXdmod/Shredder/aCloud.php index bde3be0d69..24e4046e15 100644 --- a/classes/OpenXdmod/Shredder/Cloud.php +++ b/classes/OpenXdmod/Shredder/aCloud.php @@ -7,13 +7,11 @@ namespace OpenXdmod\Shredder; use Exception; -use DateTime; -use DateTimeZone; use CCR\DB\iDatabase; use OpenXdmod\Shredder; use ETL\Utilities; -abstract class Cloud extends Shredder{ +abstract class aCloud extends Shredder{ /** * @inheritdoc @@ -48,7 +46,7 @@ public function shredDirectory($directory, array $pipelines) return false; } - Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-resources'), $this->logger); + Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-organizations', 'ingest-resource-types', 'ingest-resources'), $this->logger); Utilities::runEtlPipeline( $pipelines, $this->logger, diff --git a/configuration/etl/etl.d/organizations.json b/configuration/etl/etl.d/organizations.json new file mode 100644 index 0000000000..5940f125e4 --- /dev/null +++ b/configuration/etl/etl.d/organizations.json @@ -0,0 +1,88 @@ +{ + "defaults": { + "global": { + "namespace": "ETL\\Ingestor", + "options_class": "IngestorOptions", + "class": "DatabaseIngestor", + "truncate_destination": false, + "enabled": true, + "endpoints": { + "source": { + "type": "mysql", + "name": "Shredder/Staging Database", + "config": "database", + "schema": "mod_shredder", + "create_schema_if_not_exists": true + }, + "destination": { + "type": "mysql", + "name": "Shredder/Staging Database", + "config": "database", + "schema": "mod_shredder", + "create_schema_if_not_exists": true + } + } + } + }, + "ingest-organizations": [ + { + "name": "IngestOrganizationConfig", + "description": "Ingest organization configuration file", + "class": "StructuredFileIngestor", + "definition_file": "common/staging/organization.json", + "endpoints": { + "source": { + "type": "jsonfile", + "name": "Organization configuration", + "path": "${base_dir}/../organization.json", + "record_schema_path": "common/organization.schema.json", + "filters": [ + { + "#": "Add an ID value to the organization record", + "type": "external", + "name": "jq", + "path": "jq", + "arguments": "'{id:1,name,abbrev}'" + } + ] + } + } + }, + { + "name": "IngestOrganizationHpcdb", + "description": "Ingest organizations", + "definition_file": "common/hpcdb/organizations.json", + "endpoints": { + "destination": { + "type": "mysql", + "name": "HPCDB Database", + "config": "database", + "schema": "mod_hpcdb" + } + } + }, + { + "name": "IngestOrgranizationDatawarehouse", + "definition_file": "jobs/xdw/organization.json", + "description": "organization records", + "#": "disable to allow order_id to be set", + "optimize_query": false, + "endpoints": { + "source": { + "type": "mysql", + "name": "HPCDB", + "config": "datawarehouse", + "schema": "mod_hpcdb", + "create_schema_if_not_exists": true + }, + "destination": { + "type": "mysql", + "name": "Cloud timing test DB", + "config": "datawarehouse", + "schema": "modw", + "create_schema_if_not_exists": true + } + } + } + ] +} diff --git a/configuration/etl/etl.d/resource_types.json b/configuration/etl/etl.d/resource_types.json new file mode 100644 index 0000000000..a3ba1c6c15 --- /dev/null +++ b/configuration/etl/etl.d/resource_types.json @@ -0,0 +1,77 @@ +{ + "defaults": { + "global": { + "namespace": "ETL\\Ingestor", + "options_class": "IngestorOptions", + "class": "DatabaseIngestor", + "truncate_destination": false, + "enabled": true, + "endpoints": { + "source": { + "type": "mysql", + "name": "Shredder/Staging Database", + "config": "database", + "schema": "mod_shredder", + "create_schema_if_not_exists": true + }, + "destination": { + "type": "mysql", + "name": "Datawarehouse", + "config": "datawarehouse", + "schema": "modw", + "create_schema_if_not_exists": true + } + } + } + }, + + "ingest-resource-types": [ + { + "name": "ResourceTypesStaging", + "description": "Ingest resource types file", + "class": "StructuredFileIngestor", + "definition_file": "common/staging/resource-type.json", + "endpoints": { + "source": { + "type": "jsonfile", + "name": "Resource types", + "path": "${base_dir}/../resource_types.json", + "record_schema_path": "common/resource-types.schema.json" + }, + "destination": { + "type": "mysql", + "name": "Shredder/Staging Database", + "config": "database", + "schema": "mod_shredder" + } + } + }, + { + "name": "ResourceTypesHpcdb", + "description": "Ingest resource types", + "definition_file": "common/hpcdb/resource-types.json", + "endpoints": { + "destination": { + "type": "mysql", + "name": "HPCDB Database", + "config": "database", + "schema": "mod_hpcdb" + } + } + }, + { + "name": "ResourceTypesDatawarehouse", + "definition_file": "jobs/xdw/resource-type.json", + "description": "resource type records", + "endpoints": { + "source": { + "type": "mysql", + "name": "HPCDB", + "config": "datawarehouse", + "schema": "mod_hpcdb", + "create_schema_if_not_exists": true + } + } + } + ] +} diff --git a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh index 1da7258058..3e20139bf3 100755 --- a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh +++ b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh @@ -29,8 +29,8 @@ then for resource in $REF_DIR/*.log; do sudo -u xdmod xdmod-shredder -r `basename $resource .log` -f slurm -i $resource; done - sudo -u xdmod xdmod-ingestor sudo -u xdmod xdmod-shredder -r openstack -d $REF_DIR/openstack -f openstack + sudo -u xdmod xdmod-ingestor sudo -u xdmod xdmod-import-csv -t names -i $REF_DIR/names.csv sudo -u xdmod xdmod-ingestor php /root/bin/createusers.php From 7074fc5055ee051b4274ea94a499c7a85b14d524 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 08:45:08 -0500 Subject: [PATCH 06/18] adding tests for new shredders --- classes/OpenXdmod/Shredder/Genericcloud.php | 2 +- classes/OpenXdmod/Shredder/Openstack.php | 2 +- classes/OpenXdmod/Shredder/aCloud.php | 4 +- .../Shredder/GenericcloudShredderTest.php | 51 +++++++++++++++++++ .../Tests/Shredder/OpenstackShredderTest.php | 51 +++++++++++++++++++ 5 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/GenericcloudShredderTest.php create mode 100644 open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php index 18bd6758d8..c2d40efc3b 100644 --- a/classes/OpenXdmod/Shredder/Genericcloud.php +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -14,6 +14,6 @@ class Genericcloud extends aCloud */ public function shredDirectory($directory) { - parent::shredDirectory($directory, ['jobs-cloud-ingest-eucalyptus']); + return parent::shredDirectory($directory, ['jobs-cloud-ingest-eucalyptus']); } } diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index 284bb47726..f0d79db80a 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -14,6 +14,6 @@ class OpenStack extends aCloud */ public function shredDirectory($directory) { - parent::shredDirectory($directory, ['jobs-cloud-ingest-openstack']); + return parent::shredDirectory($directory, ['jobs-cloud-ingest-openstack']); } } diff --git a/classes/OpenXdmod/Shredder/aCloud.php b/classes/OpenXdmod/Shredder/aCloud.php index 24e4046e15..09315ea624 100644 --- a/classes/OpenXdmod/Shredder/aCloud.php +++ b/classes/OpenXdmod/Shredder/aCloud.php @@ -37,12 +37,12 @@ public function shredFile($line) public function shredDirectory($directory, array $pipelines) { if (!is_dir($directory)) { - $this->logger->err("'$directory' is not a directory"); + $this->logger->crit("'$directory' is not a directory"); return false; } if (empty($pipelines)) { - $this->logger->err("A pipeline to run was not specified. Please provide a pipeline to run."); + $this->logger->crit("A pipeline to run was not specified. Please provide a pipeline to run."); return false; } diff --git a/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/GenericcloudShredderTest.php b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/GenericcloudShredderTest.php new file mode 100644 index 0000000000..ce351f9aa2 --- /dev/null +++ b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/GenericcloudShredderTest.php @@ -0,0 +1,51 @@ + + */ + +namespace OpenXdmod\Tests\Shredder; + +use CCR\DB\NullDB; +use OpenXdmod\Shredder; + +/** + * PBS shredder test class. + */ +class GenericcloudShredderTest extends \PHPUnit_Framework_TestCase +{ + protected $db; + + public function setUp() + { + $this->db = new NullDB(); + } + + public function testShredderConstructor() + { + $shredder = Shredder::factory('genericcloud', $this->db); + $this->assertInstanceOf('\OpenXdmod\Shredder\Genericcloud', $shredder); + } + + /** + * Tests to make sure that if a non-existent directory is given that the shredDirectory + * function returns false + */ + public function testShredderParsing() + { + $shredder = $this + ->getMockBuilder('\OpenXdmod\Shredder\Genericcloud') + ->setConstructorArgs(array($this->db)) + ->setMethods(array('getResourceConfig')) + ->getMock(); + + $shredder + ->method('getResourceConfig') + ->willReturn(array()); + + $shredder->setLogger(\Log::singleton('null')); + + $shredder->setResource('testresource'); + + $this->assertFalse($shredder->shredDirectory("/directory/does/not/exist")); + } +} diff --git a/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php new file mode 100644 index 0000000000..d6b83e58ac --- /dev/null +++ b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php @@ -0,0 +1,51 @@ + + */ + +namespace OpenXdmod\Tests\Shredder; + +use CCR\DB\NullDB; +use OpenXdmod\Shredder; + +/** + * PBS shredder test class. + */ +class OpenstackShredderTest extends \PHPUnit_Framework_TestCase +{ + protected $db; + + public function setUp() + { + $this->db = new NullDB(); + } + + public function testShredderConstructor() + { + $shredder = Shredder::factory('openstack', $this->db); + $this->assertInstanceOf('\OpenXdmod\Shredder\Openstack', $shredder); + } + + /** + * Tests to make sure that if a non-existent directory is given that the shredDirectory + * function returns false + */ + public function testShredderParsing() + { + $shredder = $this + ->getMockBuilder('\OpenXdmod\Shredder\Openstack') + ->setConstructorArgs(array($this->db)) + ->setMethods(array('getResourceConfig')) + ->getMock(); + + $shredder + ->method('getResourceConfig') + ->willReturn(array()); + + $shredder->setLogger(\Log::singleton('null')); + + $shredder->setResource('openstack'); + + $this->assertFalse($shredder->shredDirectory("/directory/does/not/exist")); + } +} From eef0291a43504ba322c41bae2329a1b7d382620d Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 10:22:37 -0500 Subject: [PATCH 07/18] adding phpdocs for realmEnabled function --- classes/OpenXdmod/DataWarehouseInitializer.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index bab4f28280..ff7a7a6055 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -348,6 +348,11 @@ public function aggregate( )); } + /** + * Check to see if a realm exists in the realms table + * + * @param string $realm The realm you are checking to see if exists + */ private function realmEnabled($realm) { $realms = $this->warehouseDb->query("SELECT * FROM moddb.realms WHERE display = :realm", [':realm' => $realm]); From 901679b94929b4295a258f0616946749fc28dbd7 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 10:25:09 -0500 Subject: [PATCH 08/18] documentation changes --- classes/OpenXdmod/DataWarehouseInitializer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index ff7a7a6055..3f9066cd63 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -223,7 +223,7 @@ public function ingestCloudDataOpenStack() } /** - * Extracting openstack data from the generic_raw_events table. If the raw + * Extracting cloud log data from the generic_raw_events table. If the raw * tables do not exist then catch the resulting exception and display a message * saying that there is no generic cloud data to ingest. */ From b6d560970e5c41926e41cada7e6b79e42b8ba4d3 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 13:22:45 -0500 Subject: [PATCH 09/18] style and test changes --- .../OpenXdmod/DataWarehouseInitializer.php | 14 +-- classes/OpenXdmod/Shredder/Genericcloud.php | 3 +- classes/OpenXdmod/Shredder/Openstack.php | 3 +- classes/OpenXdmod/Shredder/aCloud.php | 97 ++++++++++--------- .../Tests/Shredder/OpenstackShredderTest.php | 2 +- 5 files changed, 64 insertions(+), 55 deletions(-) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 3f9066cd63..133415f834 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -216,7 +216,7 @@ public function ingestCloudDataOpenStack() $this->logger->notice('No OpenStack events to ingest'); } else{ - throw $e; + throw $e; } } } @@ -239,7 +239,7 @@ public function ingestCloudDataGeneric() $this->logger->notice('No cloud event data to ingest'); } else{ - throw $e; + throw $e; } } } @@ -253,12 +253,12 @@ public function ingestCloudDataGeneric() public function aggregateCloudData() { if( $this->realmEnabled('Cloud')){ - $this->logger->notice('Aggregating Cloud data'); - $this->runEtlPipeline('cloud-state-pipeline'); + $this->logger->notice('Aggregating Cloud data'); + $this->runEtlPipeline('cloud-state-pipeline'); - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->setLogger($this->logger); - $filterListBuilder->buildRealmLists('Cloud'); + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Cloud'); } } diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php index c2d40efc3b..651d99a8d6 100644 --- a/classes/OpenXdmod/Shredder/Genericcloud.php +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -14,6 +14,7 @@ class Genericcloud extends aCloud */ public function shredDirectory($directory) { - return parent::shredDirectory($directory, ['jobs-cloud-ingest-eucalyptus']); + $this->setEtlPipeline(['jobs-cloud-ingest-eucalyptus']); + return parent::shredDirectory($directory); } } diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index f0d79db80a..f921edd125 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -14,6 +14,7 @@ class OpenStack extends aCloud */ public function shredDirectory($directory) { - return parent::shredDirectory($directory, ['jobs-cloud-ingest-openstack']); + $this->setEtlPipeline(['jobs-cloud-ingest-openstack']); + return parent::shredDirectory($directory); } } diff --git a/classes/OpenXdmod/Shredder/aCloud.php b/classes/OpenXdmod/Shredder/aCloud.php index 09315ea624..3643f7755d 100644 --- a/classes/OpenXdmod/Shredder/aCloud.php +++ b/classes/OpenXdmod/Shredder/aCloud.php @@ -11,49 +11,56 @@ use OpenXdmod\Shredder; use ETL\Utilities; -abstract class aCloud extends Shredder{ - - /** - * @inheritdoc - */ - public function __construct(iDatabase $db) - { - $this->logger = \Log::singleton('null'); - } - - /** - * Shredding by specifing a single file in not supported by the cloud pipelines. - * Throw an exception if someone tries to shred cloud data using the -i flag instead - * of using -d - */ - public function shredFile($line) - { - throw new Exception('Cloud resources do not support shredding by file. Please use the -d option and specify a directory'); - } - - /** - * @inheritdoc - */ - public function shredDirectory($directory, array $pipelines) - { - if (!is_dir($directory)) { - $this->logger->crit("'$directory' is not a directory"); - return false; - } - - if (empty($pipelines)) { - $this->logger->crit("A pipeline to run was not specified. Please provide a pipeline to run."); - return false; - } - - Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-organizations', 'ingest-resource-types', 'ingest-resources'), $this->logger); - Utilities::runEtlPipeline( - $pipelines, - $this->logger, - array( - 'include-only-resource-codes' => $this->resource, - 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] - ) - ); - } +abstract class aCloud extends Shredder +{ + + protected $etlPipelines = array(); + + /** + * @inheritdoc + */ + public function __construct(iDatabase $db) + { + $this->logger = \Log::singleton('null'); + } + + /** + * Shredding by specifing a single file in not supported by the cloud pipelines. + * Throw an exception if someone tries to shred cloud data using the -i flag instead + * of using -d + */ + public function shredFile($line) + { + throw new Exception('Cloud resources do not support shredding by file. Please use the -d option and specify a directory'); + } + + /** + * @inheritdoc + */ + public function shredDirectory($directory) + { + if (!is_dir($directory)) { + $this->logger->crit("'$directory' is not a directory"); + return false; + } + + if (empty($this->etlPipelines)) { + $this->logger->crit("A pipeline to run was not specified. Please provide a pipeline to run."); + return false; + } + + Utilities::runEtlPipeline(array('jobs-common','jobs-cloud-common','ingest-organizations', 'ingest-resource-types', 'ingest-resources'), $this->logger); + Utilities::runEtlPipeline( + $this->etlPipelines, + $this->logger, + array( + 'include-only-resource-codes' => $this->resource, + 'variable-overrides' => ['CLOUD_EVENT_LOG_DIRECTORY' => $directory] + ) + ); + } + + public function setEtlPipeline(array $pipelines){ + $this->etlPipelines = $pipelines; + } } diff --git a/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php index d6b83e58ac..95ca968fe0 100644 --- a/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php +++ b/open_xdmod/modules/xdmod/tests/lib/OpenXdmod/Tests/Shredder/OpenstackShredderTest.php @@ -44,7 +44,7 @@ public function testShredderParsing() $shredder->setLogger(\Log::singleton('null')); - $shredder->setResource('openstack'); + $shredder->setResource('testresource'); $this->assertFalse($shredder->shredDirectory("/directory/does/not/exist")); } From ddf1eba429b9752208e93aee1fb92e329798d8ca Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 13:31:27 -0500 Subject: [PATCH 10/18] style changes --- classes/ETL/Utilities.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classes/ETL/Utilities.php b/classes/ETL/Utilities.php index 43e0049c87..ae2ccafaee 100644 --- a/classes/ETL/Utilities.php +++ b/classes/ETL/Utilities.php @@ -324,7 +324,7 @@ public static function quoteVariables(array $variables, VariableStore $variableS return $localVariableMap; } // quoteVariables() - public static function runEtlPipeline(array $pipelines, $logger, array $params=array()) + public static function runEtlPipeline(array $pipelines, $logger, array $params = array()) { $logger->debug( sprintf( From 29c38010e460f94ec8ad9265a9dad04e4f6600b2 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 30 Nov 2018 13:49:43 -0500 Subject: [PATCH 11/18] adding newline that shouldn't have been deleted --- classes/OpenXdmod/Shredder.php | 1 + 1 file changed, 1 insertion(+) diff --git a/classes/OpenXdmod/Shredder.php b/classes/OpenXdmod/Shredder.php index effa0211ae..1a55a57ba7 100644 --- a/classes/OpenXdmod/Shredder.php +++ b/classes/OpenXdmod/Shredder.php @@ -1030,3 +1030,4 @@ protected function getResourceConfig($name) throw new Exception("No config found for '$name' in '$file'"); } } + From e228e6db8f58154879e6707ebf22118b171a5848 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 3 Dec 2018 13:56:58 -0500 Subject: [PATCH 12/18] use false instead of null to determine if realm should be aggregated --- bin/xdmod-ingestor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index 93a91040d4..ec45e091bd 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -284,11 +284,11 @@ function main() $logger->info('Aggregating data'); try { //If there is no realm specified to aggregate then all realms should be aggregated - if($realmToAggregate == 'job' || $realmToAggregate === null){ + if($realmToAggregate == 'job' || $realmToAggregate === false){ $dwi->aggregateAllJobs($lastModifiedStartDate); } - if($realmToAggregate == 'cloud' || $realmToAggregate === null){ + if($realmToAggregate == 'cloud' || $realmToAggregate === false){ $dwi->aggregateCloudData(); } } catch (Exception $e) { From ebe18ad113c1ec61a80f4cc192da0091d23ba44c Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 3 Dec 2018 14:33:16 -0500 Subject: [PATCH 13/18] change $realmToAggregate to default to false instead of null --- bin/xdmod-ingestor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index ec45e091bd..6c0d03089e 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -78,9 +78,9 @@ function main() } $help = $ingest = $aggregate = $noAppend = $ingestAll = $ingestShredded - = $ingestStaging = $ingestHpcdb = $datatype = false; + = $ingestStaging = $ingestHpcdb = $datatype = $realmToAggregate = false; - $startDate = $endDate = $lastModifiedStartDate = $realmToAggregate = $datatypeValue = null; + $startDate = $endDate = $lastModifiedStartDate = $datatypeValue = null; $logLevel = -1; From 64abdda996338cc64fffdde4d22f9d25caebeab4 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 14 Dec 2018 13:38:34 -0500 Subject: [PATCH 14/18] addressing nitpicks from @plessbd --- bin/xdmod-ingestor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index 6c0d03089e..a3900e93a8 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -57,7 +57,7 @@ function main() array('', 'ingest-staging'), array('', 'ingest-hpcdb'), - //Type of data that is being ingested. + // Type of data that is being ingested. array('', 'datatype:'), // Specify an ingestor. @@ -283,7 +283,7 @@ function main() if ($aggregate) { $logger->info('Aggregating data'); try { - //If there is no realm specified to aggregate then all realms should be aggregated + // If there is no realm specified to aggregate then all realms should be aggregated if($realmToAggregate == 'job' || $realmToAggregate === false){ $dwi->aggregateAllJobs($lastModifiedStartDate); } From 30a5391453f9e06150f28323138d7606d2695c2c Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Fri, 14 Dec 2018 14:28:42 -0500 Subject: [PATCH 15/18] removing minmaxdate statement from bootstrap.sh --- open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh index 3e20139bf3..0146b1468b 100755 --- a/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh +++ b/open_xdmod/modules/xdmod/integration_tests/scripts/bootstrap.sh @@ -37,8 +37,6 @@ then # This will ensure that the users created in `/root/bin/createusers.php` # have their organizations set correctly. sudo -u xdmod php /usr/share/xdmod/tools/etl/etl_overseer.php -p xdmod.acls-import - #Updating minmaxdate table so data for cloud realm shows up - mysql -e "UPDATE modw.minmaxdate SET max_job_date = '2018-07-01';" fi if [ "$XDMOD_TEST_MODE" = "upgrade" ]; From c1af6959e1e9911a1339646347be423d6e3116f3 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 17 Dec 2018 09:46:18 -0500 Subject: [PATCH 16/18] setting etl pipline to use for cloud shredders in constructor. --- bin/xdmod-shredder | 11 ++++++++--- classes/OpenXdmod/Shredder/Genericcloud.php | 8 ++++---- classes/OpenXdmod/Shredder/Openstack.php | 8 ++++---- classes/OpenXdmod/Shredder/aCloud.php | 10 +++++++--- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index a67ec6697b..356668b001 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -233,12 +233,17 @@ function main() $logger->notice("Job errors written to '$jobErrorLogFile'"); } - if (!$dryRun && in_array($format, array('pbs', 'slurm', 'lsf', 'sge', 'uge'))) { - $logger->notice('Normalizing data'); + if (!$dryRun) { + $logger->notice('Normalizing data!'); try { $ingestor = $shredder->getJobIngestor(); - $ingestor->ingest(); + // The cloud shredders do not have jobs to ingest and return false when + // getJobInestor is called for them so we don't have to hard code skippping + // those formats here. + if($ingestor !== false){ + $ingestor->ingest(); + } } catch (Exception $e) { $logger->crit(array( 'message' => 'Ingestion failed: ' . $e->getMessage(), diff --git a/classes/OpenXdmod/Shredder/Genericcloud.php b/classes/OpenXdmod/Shredder/Genericcloud.php index 651d99a8d6..90008ea39d 100644 --- a/classes/OpenXdmod/Shredder/Genericcloud.php +++ b/classes/OpenXdmod/Shredder/Genericcloud.php @@ -7,14 +7,14 @@ namespace OpenXdmod\Shredder; +use CCR\DB\iDatabase; + class Genericcloud extends aCloud { /** * @inheritdoc */ - public function shredDirectory($directory) - { - $this->setEtlPipeline(['jobs-cloud-ingest-eucalyptus']); - return parent::shredDirectory($directory); + public function __construct(iDatabase $db){ + parent::__construct($db, ['jobs-cloud-ingest-eucalyptus']); } } diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index f921edd125..73ad0333ac 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -7,14 +7,14 @@ namespace OpenXdmod\Shredder; +use CCR\DB\iDatabase; + class OpenStack extends aCloud { /** * @inheritdoc */ - public function shredDirectory($directory) - { - $this->setEtlPipeline(['jobs-cloud-ingest-openstack']); - return parent::shredDirectory($directory); + public function __construct(iDatabase $db){ + parent::__construct($db, ['jobs-cloud-ingest-openstack']); } } diff --git a/classes/OpenXdmod/Shredder/aCloud.php b/classes/OpenXdmod/Shredder/aCloud.php index 3643f7755d..3c6e5706c8 100644 --- a/classes/OpenXdmod/Shredder/aCloud.php +++ b/classes/OpenXdmod/Shredder/aCloud.php @@ -19,9 +19,10 @@ abstract class aCloud extends Shredder /** * @inheritdoc */ - public function __construct(iDatabase $db) + public function __construct(iDatabase $db, array $pipelines) { $this->logger = \Log::singleton('null'); + $this->etlPipelines = $pipelines; } /** @@ -60,7 +61,10 @@ public function shredDirectory($directory) ); } - public function setEtlPipeline(array $pipelines){ - $this->etlPipelines = $pipelines; + /** + * Returns false so the same function in the parent class is not called since the cloud formats do not have jobs to ingest + */ + public function getJobIngestor(){ + return false; } } From 45db8507e81662f3abb406d31c3f9816bf48a4b5 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Mon, 17 Dec 2018 11:03:02 -0500 Subject: [PATCH 17/18] changes to get tests to pass --- classes/OpenXdmod/DataWarehouseInitializer.php | 4 ++-- classes/OpenXdmod/Shredder/Openstack.php | 2 +- classes/OpenXdmod/Shredder/aCloud.php | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 133415f834..56697cf5c8 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -229,7 +229,7 @@ public function ingestCloudDataOpenStack() */ public function ingestCloudDataGeneric() { - if( $this->realmEnabled('Cloud')){ + if( $this->realmEnabled('Cloud') ){ try{ $this->logger->notice('Ingesting generic cloud log files'); $this->runEtlPipeline('jobs-cloud-extract-eucalyptus'); @@ -252,7 +252,7 @@ public function ingestCloudDataGeneric() */ public function aggregateCloudData() { - if( $this->realmEnabled('Cloud')){ + if( $this->realmEnabled('Cloud') ){ $this->logger->notice('Aggregating Cloud data'); $this->runEtlPipeline('cloud-state-pipeline'); diff --git a/classes/OpenXdmod/Shredder/Openstack.php b/classes/OpenXdmod/Shredder/Openstack.php index 73ad0333ac..1e13e03d28 100644 --- a/classes/OpenXdmod/Shredder/Openstack.php +++ b/classes/OpenXdmod/Shredder/Openstack.php @@ -9,7 +9,7 @@ use CCR\DB\iDatabase; -class OpenStack extends aCloud +class Openstack extends aCloud { /** * @inheritdoc diff --git a/classes/OpenXdmod/Shredder/aCloud.php b/classes/OpenXdmod/Shredder/aCloud.php index 3c6e5706c8..24e110d6a1 100644 --- a/classes/OpenXdmod/Shredder/aCloud.php +++ b/classes/OpenXdmod/Shredder/aCloud.php @@ -64,7 +64,7 @@ public function shredDirectory($directory) /** * Returns false so the same function in the parent class is not called since the cloud formats do not have jobs to ingest */ - public function getJobIngestor(){ + public function getJobIngestor($ingestAll = false){ return false; } } From b60aa6bd9c2ae2f35c6bbd653b7c10d1ef3f2c47 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Thu, 20 Dec 2018 08:26:02 -0500 Subject: [PATCH 18/18] update suggested by @plessbd --- classes/OpenXdmod/DataWarehouseInitializer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 56697cf5c8..5c24ea8eb1 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -356,7 +356,7 @@ public function aggregate( private function realmEnabled($realm) { $realms = $this->warehouseDb->query("SELECT * FROM moddb.realms WHERE display = :realm", [':realm' => $realm]); - return (count($realms) > 0) ? true : false; + return (count($realms) > 0); } /**