-
Notifications
You must be signed in to change notification settings - Fork 68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for cloud data to xdmod-shredder and xdmod-ingestor #739
Changes from all commits
ed0a524
56ae3f9
c10909b
b55927a
a1d9388
7074fc5
eef0291
901679b
b6d5609
ddf1eba
29c3801
e228e6d
ebe18ad
64abdda
37564c8
30a5391
3ba69fc
c1af695
45db850
b60aa6b
f3dc0ec
0eb33e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -234,11 +234,16 @@ function main() | |
} | ||
|
||
if (!$dryRun) { | ||
$logger->notice('Normalizing data'); | ||
$logger->notice('Normalizing data!'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am so excited we are normalizing this data, before I was just fine. |
||
|
||
try { | ||
$ingestor = $shredder->getJobIngestor(); | ||
$ingestor->ingest(); | ||
// The cloud shredders do not have jobs to ingest and return false when | ||
// getJobInestor is called for them so we don't have to hard code skippping | ||
// those formats here. | ||
if($ingestor !== false){ | ||
$ingestor->ingest(); | ||
} | ||
} catch (Exception $e) { | ||
$logger->crit(array( | ||
'message' => 'Ingestion failed: ' . $e->getMessage(), | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -323,4 +323,54 @@ public static function quoteVariables(array $variables, VariableStore $variableS | |||||||||
|
||||||||||
return $localVariableMap; | ||||||||||
} // quoteVariables() | ||||||||||
|
||||||||||
public static function runEtlPipeline(array $pipelines, $logger, array $params = array()) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like this should be a separate pull request that also removes similar code from the code base:
Line 2251 in 133a3ae
Specifically so we dont have the many places this is done in our code base and we dont forget about it. You do state in the description that there is a subsequent PR for this can you please link to it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the branch, https://github.com/eiffel777/xdmod/tree/move-runEtlPipeline-references-to-etl-utilities-class, that has the changes talked about in the last paragraph of the description. It's a bit behind xdmod8.1 right now so there is a some extra code in the diff that can be ignored |
||||||||||
{ | ||||||||||
$logger->debug( | ||||||||||
sprintf( | ||||||||||
'Shredding directory using ETL pipeline "%s" with parameters %s', | ||||||||||
implode(', ', $pipelines), | ||||||||||
json_encode($params) | ||||||||||
) | ||||||||||
); | ||||||||||
|
||||||||||
$configOptions = array('default_module_name' => 'xdmod'); | ||||||||||
if( array_key_exists('variable-overrides', $params) ){ | ||||||||||
$configOptions['config_variables'] = $params['variable-overrides']; | ||||||||||
} | ||||||||||
|
||||||||||
$etlConfig = new EtlConfiguration( | ||||||||||
CONFIG_DIR . '/etl/etl.json', | ||||||||||
null, | ||||||||||
$logger, | ||||||||||
$configOptions | ||||||||||
); | ||||||||||
$etlConfig->initialize(); | ||||||||||
self::setEtlConfig($etlConfig); | ||||||||||
|
||||||||||
$scriptOptions = array_merge( | ||||||||||
array( | ||||||||||
'default-module-name' => 'xdmod', | ||||||||||
'process-sections' => $pipelines, | ||||||||||
), | ||||||||||
$params | ||||||||||
); | ||||||||||
$logger->debug( | ||||||||||
sprintf( | ||||||||||
'Running ETL pipeline with script options %s', | ||||||||||
json_encode($scriptOptions) | ||||||||||
) | ||||||||||
); | ||||||||||
|
||||||||||
$overseerOptions = new EtlOverseerOptions( | ||||||||||
$scriptOptions, | ||||||||||
$logger | ||||||||||
); | ||||||||||
|
||||||||||
$utilitySchema = $etlConfig->getGlobalEndpoint('utility')->getSchema(); | ||||||||||
$overseerOptions->setResourceCodeToIdMapSql(sprintf("SELECT id, code from %s.resourcefact", $utilitySchema)); | ||||||||||
|
||||||||||
$overseer = new EtlOverseer($overseerOptions, $logger); | ||||||||||
$overseer->execute($etlConfig); | ||||||||||
} // runEtlPipeline | ||||||||||
} // class Utilities |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to the docs the $value for an optional argument is false if it is not specified. Assuming the documentation is correct you'll need to check for === false rather than === null on line 287. Please confirm.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jpwhite4 Yeah, that does seem to be the case. It should be changed now.