Skip to content

Commit

Permalink
feat(textract doc handle): add documentation handling for textract
Browse files Browse the repository at this point in the history
  • Loading branch information
DenovVasil committed Jan 6, 2025
1 parent 3d20ed2 commit 60f9415
Show file tree
Hide file tree
Showing 9 changed files with 273 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,29 @@
"name" : "Polling",
"value" : "POLLING"
} ]
}, {
"id" : "input.documentLocationType",
"label" : "Document location type",
"description" : "Document location",
"optional" : false,
"group" : "input",
"binding" : {
"name" : "input.documentLocationType",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.executionType",
"equals" : "SYNC",
"type" : "simple"
},
"type" : "Dropdown",
"choices" : [ {
"name" : "S3",
"value" : "S3"
}, {
"name" : "Uploaded document",
"value" : "UPLOADED"
} ]
}, {
"id" : "input.documentS3Bucket",
"label" : "Document bucket",
Expand All @@ -165,6 +188,11 @@
"name" : "input.documentS3Bucket",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentName",
Expand All @@ -180,6 +208,11 @@
"name" : "input.documentName",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentVersion",
Expand All @@ -192,6 +225,11 @@
"name" : "input.documentVersion",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.analyzeTables",
Expand Down Expand Up @@ -372,6 +410,25 @@
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.document",
"label" : "Document",
"optional" : false,
"constraints" : {
"notEmpty" : true
},
"feel" : "required",
"group" : "input",
"binding" : {
"name" : "input.document",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "UPLOADED",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "resultVariable",
"label" : "Result variable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,29 @@
"name" : "Polling",
"value" : "POLLING"
} ]
}, {
"id" : "input.documentLocationType",
"label" : "Document location type",
"description" : "Document location",
"optional" : false,
"group" : "input",
"binding" : {
"name" : "input.documentLocationType",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.executionType",
"equals" : "SYNC",
"type" : "simple"
},
"type" : "Dropdown",
"choices" : [ {
"name" : "S3",
"value" : "S3"
}, {
"name" : "Uploaded document",
"value" : "UPLOADED"
} ]
}, {
"id" : "input.documentS3Bucket",
"label" : "Document bucket",
Expand All @@ -170,6 +193,11 @@
"name" : "input.documentS3Bucket",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentName",
Expand All @@ -185,6 +213,11 @@
"name" : "input.documentName",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentVersion",
Expand All @@ -197,6 +230,11 @@
"name" : "input.documentVersion",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.analyzeTables",
Expand Down Expand Up @@ -377,6 +415,25 @@
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.document",
"label" : "Document",
"optional" : false,
"constraints" : {
"notEmpty" : true
},
"feel" : "required",
"group" : "input",
"binding" : {
"name" : "input.document",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "UPLOADED",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "resultVariable",
"label" : "Result variable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import com.amazonaws.services.textract.model.AnalyzeDocumentResult;
import com.amazonaws.services.textract.model.Document;
import io.camunda.connector.textract.model.TextractRequestData;
import java.nio.ByteBuffer;
import java.util.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -22,7 +24,8 @@ public class SyncTextractCaller implements TextractCaller<AnalyzeDocumentResult>
public AnalyzeDocumentResult call(
TextractRequestData requestData, AmazonTextract textractClient) {
LOGGER.debug("Starting sync task for document analysis with request data: {}", requestData);
final Document document = new Document().withS3Object(prepareS3Obj(requestData));

final Document document = createDocument(requestData);

final AnalyzeDocumentRequest analyzeDocumentRequest =
new AnalyzeDocumentRequest()
Expand All @@ -31,4 +34,16 @@ public AnalyzeDocumentResult call(

return textractClient.analyzeDocument(analyzeDocumentRequest);
}

private Document createDocument(TextractRequestData requestData) {
final Document document = new Document();

if (Objects.isNull(requestData.document())) {
return document.withS3Object(prepareS3Obj(requestData));
}

byte[] docBytes = requestData.document().asByteArray();
document.withBytes(ByteBuffer.wrap(docBytes));
return document;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH
* under one or more contributor license agreements. Licensed under a proprietary license.
* See the License.txt file for more information. You may not use this file
* except in compliance with the proprietary license.
*/
package io.camunda.connector.textract.model;

public enum DocumentLocationType {
S3,
UPLOADED;
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
*/
package io.camunda.connector.textract.model;

import io.camunda.connector.generator.dsl.Property;
import io.camunda.connector.generator.dsl.Property.FeelMode;
import io.camunda.connector.generator.java.annotation.TemplateProperty;
import io.camunda.document.Document;
import jakarta.validation.constraints.AssertTrue;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import org.apache.commons.lang3.StringUtils;

Expand All @@ -28,23 +29,52 @@ public record TextractRequestData(
description = "Endpoint inference type")
@NotNull
TextractExecutionType executionType,
@TemplateProperty(
group = "input",
label = "Document location type",
description = "Document location",
feel = FeelMode.disabled,
type = TemplateProperty.PropertyType.Dropdown,
choices = {
@TemplateProperty.DropdownPropertyChoice(value = "S3", label = "S3"),
@TemplateProperty.DropdownPropertyChoice(
value = "UPLOADED",
label = "Uploaded document")
},
condition =
@TemplateProperty.PropertyCondition(
property = "input.executionType",
equals = "SYNC"))
DocumentLocationType documentLocationType,
@TemplateProperty(
group = "input",
label = "Document bucket",
description = "S3 bucket that contains document that needs to be processed")
@NotBlank
description = "S3 bucket that contains document that needs to be processed",
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
String documentS3Bucket,
@TemplateProperty(
group = "input",
label = "Document path",
description = "S3 document path to be processed")
@NotBlank
description = "S3 document path to be processed",
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
String documentName,
@TemplateProperty(
group = "input",
label = "Document version",
description = "S3 document version to be processed",
optional = true)
optional = true,
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"))
String documentVersion,
@TemplateProperty(
label = "Analyze tables",
Expand Down Expand Up @@ -150,14 +180,25 @@ public record TextractRequestData(
@TemplateProperty.PropertyCondition(
property = "input.executionType",
equals = "ASYNC"))
String outputConfigS3Prefix) {
String outputConfigS3Prefix,
@TemplateProperty(
group = "input",
label = "Document",
feel = Property.FeelMode.required,
type = TemplateProperty.PropertyType.String,
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "UPLOADED"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
Document document) {
@TemplateProperty(ignore = true)
public static final String WRONG_OUTPUT_VALUES_MSG =
"Output S3 bucket must be filled in if output S3 prefix is filled in";

@TemplateProperty(ignore = true)
public static final String WRONG_NOTIFICATION_VALUES_MSG =
"either both notification values role ARN and topic ARN must be filled in or none of them";
"Either both notification values role ARN and topic ARN must be filled in or none of them";

@AssertTrue(message = WRONG_NOTIFICATION_VALUES_MSG)
public boolean isValidNotificationProperties() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.amazonaws.services.textract.AmazonTextractAsyncClient;
import com.amazonaws.services.textract.model.StartDocumentAnalysisRequest;
import com.amazonaws.services.textract.model.StartDocumentAnalysisResult;
import io.camunda.connector.textract.model.DocumentLocationType;
import io.camunda.connector.textract.model.TextractExecutionType;
import io.camunda.connector.textract.model.TextractRequestData;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -100,6 +101,7 @@ void callWithoutOutputS3BucketShouldNotCreateOutputObj() {
private TextractRequestData prepareReqData(String roleArn, String topicArn) {
return new TextractRequestData(
TextractExecutionType.ASYNC,
DocumentLocationType.S3,
"test-bucket",
"test-object",
"1",
Expand All @@ -113,12 +115,14 @@ private TextractRequestData prepareReqData(String roleArn, String topicArn) {
roleArn,
topicArn,
"outputBucket",
"prefix");
"prefix",
null);
}

private TextractRequestData prepareReqDataWithoutOutputS3Bucket() {
return new TextractRequestData(
TextractExecutionType.ASYNC,
DocumentLocationType.S3,
"test-bucket",
"test-object",
"1",
Expand All @@ -132,6 +136,7 @@ private TextractRequestData prepareReqDataWithoutOutputS3Bucket() {
"roleArn",
"topicArn",
"",
"prefix");
"prefix",
null);
}
}
Loading

0 comments on commit 60f9415

Please sign in to comment.