Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make crawler optional in GlueTransformStage() #378

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions API.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 30 additions & 24 deletions src/stages/glue-transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ export class GlueTransformStage extends StateMachineStage {
readonly stateMachine: sfn.StateMachine;
readonly glueJob: glue_alpha.IJob;
readonly crawler?: glue.CfnCrawler;
readonly crawlerName?: string;
readonly definition: sfn.IChainable;

/**
* Constructs `GlueTransformStage`.
Expand All @@ -91,8 +93,11 @@ export class GlueTransformStage extends StateMachineStage {
this.glueJob = this.getGlueJob(scope, id, props);
const jobRunArgs = props.jobRunArgs;

this.crawler = props.crawlerName ? undefined : this.getCrawler(props);
const crawlerName = this.crawler ? this.crawler.ref : props.crawlerName;
// Only setup crawler if one of 'crawlerName', 'crawlerProps' or 'crawlerRole' is provided
if (props.crawlerName || props.crawlerProps || props.crawlerRole) {
this.crawler = props.crawlerName ? undefined : this.getCrawler(props);
this.crawlerName = this.crawler ? this.crawler.ref : props.crawlerName;
}

const startJobRun = new tasks.GlueStartJobRun(this, "Start Job Run", {
glueJobName: this.glueJob.jobName,
Expand All @@ -102,36 +107,37 @@ export class GlueTransformStage extends StateMachineStage {
});

const stack = cdk.Stack.of(this);
const crawlObject = new tasks.CallAwsService(this, "Crawl Object", {
service: "glue",
action: "startCrawler",
parameters: {
Name: crawlerName,
},
iamResources: [`arn:${stack.partition}:glue:${stack.region}:${stack.account}:crawler/${crawlerName}`],
});

const successTask = new sfn.Succeed(this, "Success");

crawlObject.addRetry({
errors: ["Glue.CrawlerRunningException"],
maxAttempts: props.stateMachineRetryMaxAttempts ?? 3,
backoffRate: props.stateMachineRetryBackoffRate ?? 2,
interval: props.stateMachineRetryInterval ?? cdk.Duration.seconds(1),
});

const crawlerAllowFailure = props.crawlerAllowFailure ?? true;
if (crawlerAllowFailure) {
crawlObject.addCatch(successTask, { errors: ["Glue.CrawlerRunningException"] });
if (this.crawlerName) {
const crawlObject = new tasks.CallAwsService(this, "Crawl Object", {
service: "glue",
action: "startCrawler",
parameters: {
Name: this.crawlerName,
},
iamResources: [`arn:${stack.partition}:glue:${stack.region}:${stack.account}:crawler/${this.crawlerName}`],
});
crawlObject.addRetry({
errors: ["Glue.CrawlerRunningException"],
maxAttempts: props.stateMachineRetryMaxAttempts ?? 3,
backoffRate: props.stateMachineRetryBackoffRate ?? 2,
interval: props.stateMachineRetryInterval ?? cdk.Duration.seconds(1),
});
const crawlerAllowFailure = props.crawlerAllowFailure ?? true;
if (crawlerAllowFailure) {
crawlObject.addCatch(successTask, { errors: ["Glue.CrawlerRunningException"] });
}
this.definition = startJobRun.next(crawlObject.next(successTask));
} else {
this.definition = startJobRun.next(successTask);
}

const definition = startJobRun.next(crawlObject.next(successTask));

({
eventPattern: this.eventPattern,
targets: this.targets,
stateMachine: this.stateMachine,
} = this.createStateMachine({ definition: definition, ...props }));
} = this.createStateMachine({ definition: this.definition, ...props }));
}

private getGlueJob(scope: Construct, id: string, props: GlueTransformStageProps): glue_alpha.IJob {
Expand Down
22 changes: 12 additions & 10 deletions test/glue-transform-stage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,6 @@ test("GlueTranformStage must have 'jobName' or 'jobProps' set", () => {
}).toThrowError("'jobName' or 'jobProps' must be set to instantiate this stage");
});

test("GlueTranformStage must set crawler role ", () => {
const stack = new cdk.Stack();
expect(() => {
new GlueTransformStage(stack, "Stage", {
jobName: "myJob",
});
}).toThrowError("Crawler Role must be set either by 'crawlerRole' or 'crawlerProps.role");
});

test("GlueTranformStage must set crawler targets", () => {
const stack = new cdk.Stack();
expect(() => {
Expand Down Expand Up @@ -148,7 +139,7 @@ test("GlueTransformStage retry settings", () => {
});
});

test("GlueTransformStage crawler allo failure settings", () => {
test("GlueTransformStage crawler allow failure settings", () => {
const stack = new cdk.Stack();

new GlueTransformStage(stack, "glue-transform-disallow-failure", {
Expand All @@ -169,3 +160,14 @@ test("GlueTransformStage crawler allo failure settings", () => {
},
});
});

test("GlueTransformStage no crawler", () => {
const stack = new cdk.Stack();

new GlueTransformStage(stack, "glue-transform-no-crawler", {
jobName: "myJob",
});

const template = Template.fromStack(stack);
template.resourceCountIs("AWS::Glue::Crawler", 0);
});