Skip to content

Commit

Permalink
Merge pull request #39556 from drewtul/f-bedrockagent-customtransform…
Browse files Browse the repository at this point in the history
…ation

Bedrock Agent DataSource custom transformation support
  • Loading branch information
ewbankkit authored Oct 2, 2024
2 parents 2b0ad37 + bd5b39d commit e2d4b22
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 9 deletions.
3 changes: 3 additions & 0 deletions .changelog/39556.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_bedrockagent_data_source: Add `vector_ingestion_configuration.custom_transformation_configuration` argument
```
15 changes: 8 additions & 7 deletions internal/service/bedrockagent/bedrockagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ func TestAccBedrockAgent_serial(t *testing.T) {
"updateOpenSearch": testAccKnowledgeBase_updateOpenSearch,
},
"DataSource": {
acctest.CtBasic: testAccDataSource_basic,
acctest.CtDisappears: testAccDataSource_disappears,
"full": testAccDataSource_full,
"update": testAccDataSource_update,
"semantic": testAccDataSource_fullSemantic,
"hierarchical": testAccDataSource_fullHierarchical,
"parsing": testAccDataSource_parsing,
acctest.CtBasic: testAccDataSource_basic,
acctest.CtDisappears: testAccDataSource_disappears,
"full": testAccDataSource_full,
"update": testAccDataSource_update,
"semantic": testAccDataSource_fullSemantic,
"hierarchical": testAccDataSource_fullHierarchical,
"parsing": testAccDataSource_parsing,
"customtransformation": testAccDataSource_fullCustomTranformation,
},
}

Expand Down
131 changes: 129 additions & 2 deletions internal/service/bedrockagent/data_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,106 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema
},
},
},
"custom_transformation_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[customTransformationConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"intermediate_storage": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[intermediaStorageModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"s3_location": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[s3LocationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
names.AttrURI: schema.StringAttribute{
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
},
},
},
},
},
"transformation": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"step_to_apply": schema.StringAttribute{
CustomType: fwtypes.StringEnumType[awstypes.StepType](),
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
Blocks: map[string]schema.Block{
"transformation_function": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationFunctionModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"transformation_lambda_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationLambdaConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"lambda_arn": schema.StringAttribute{
CustomType: fwtypes.ARNType,
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
},
},
},
},
},
},
},
},
},
},
},
"parsing_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[parsingConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
Expand Down Expand Up @@ -657,15 +757,42 @@ type serverSideEncryptionConfigurationModel struct {
}

type vectorIngestionConfigurationModel struct {
ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"`
ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"`
ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"`
CustomTransformationConfiguration fwtypes.ListNestedObjectValueOf[customTransformationConfigurationModel] `tfsdk:"custom_transformation_configuration"`
ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"`
}

type parsingConfigurationModel struct {
ParsingStrategy fwtypes.StringEnum[awstypes.ParsingStrategy] `tfsdk:"parsing_strategy"`
BedrockFoundationModelConfiguration fwtypes.ListNestedObjectValueOf[bedrockFoundationModelConfigurationModel] `tfsdk:"bedrock_foundation_model_configuration"`
}

type customTransformationConfigurationModel struct {
IntermediateStorage fwtypes.ListNestedObjectValueOf[intermediaStorageModel] `tfsdk:"intermediate_storage"`
Transformation fwtypes.ListNestedObjectValueOf[transformationModel] `tfsdk:"transformation"`
}

type intermediaStorageModel struct {
S3Location fwtypes.ListNestedObjectValueOf[s3LocationModel] `tfsdk:"s3_location"`
}

type s3LocationModel struct {
Uri types.String `tfsdk:"uri"`
}

type transformationModel struct {
StepToApply fwtypes.StringEnum[awstypes.StepType] `tfsdk:"step_to_apply"`
TransformationFunction fwtypes.ListNestedObjectValueOf[transformationFunctionModel] `tfsdk:"transformation_function"`
}

type transformationFunctionModel struct {
TransformationLambdaConfiguration fwtypes.ListNestedObjectValueOf[transformationLambdaConfigurationModel] `tfsdk:"transformation_lambda_configuration"`
}

type transformationLambdaConfigurationModel struct {
LambdaArn fwtypes.ARN `tfsdk:"lambda_arn"`
}

type bedrockFoundationModelConfigurationModel struct {
ModelArn fwtypes.ARN `tfsdk:"model_arn"`
ParsingPrompt fwtypes.ListNestedObjectValueOf[parsingPromptModel] `tfsdk:"parsing_prompt"`
Expand Down
116 changes: 116 additions & 0 deletions internal/service/bedrockagent/data_source_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,71 @@ func testAccDataSource_fullHierarchical(t *testing.T) {
})
}

// Prerequisites:
// * psql run via null_resource/provisioner "local-exec"
// * jq for parsing output from aws cli to retrieve postgres password
func testAccDataSource_fullCustomTranformation(t *testing.T) {
acctest.SkipIfExeNotOnPath(t, "psql")
acctest.SkipIfExeNotOnPath(t, "jq")
acctest.SkipIfExeNotOnPath(t, "aws")

ctx := acctest.Context(t)
if testing.Short() {
t.Skip("skipping long-running test in short mode")
}

var dataSource types.DataSource
rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix)
resourceName := "aws_bedrockagent_data_source.test"
foundationModel := "amazon.titan-embed-text-v1"

resource.Test(t, resource.TestCase{
PreCheck: func() {
acctest.PreCheck(ctx, t)
},
ErrorCheck: acctest.ErrorCheck(t, names.BedrockAgentServiceID),
ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories,
ExternalProviders: map[string]resource.ExternalProvider{
"null": {
Source: "hashicorp/null",
VersionConstraint: "3.2.2",
},
},
CheckDestroy: testAccCheckDataSourceDestroy(ctx),
Steps: []resource.TestStep{
{
Config: testAccDataSourceConfig_fullCustomTransformation(rName, foundationModel),
Check: resource.ComposeAggregateTestCheckFunc(
testAccCheckDataSourceExists(ctx, resourceName, &dataSource),
resource.TestCheckResourceAttr(resourceName, "data_deletion_policy", "RETAIN"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttrSet(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_arn"),
resource.TestCheckNoResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_owner_account_id"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.#", acctest.Ct1),
resource.TestCheckTypeSetElemAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.*", "Europe/France/Nouvelle-Aquitaine/Bordeaux"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.type", "S3"),
resource.TestCheckResourceAttrSet(resourceName, "data_source_id"),
resource.TestCheckResourceAttr(resourceName, names.AttrDescription, "testing"),
resource.TestCheckResourceAttr(resourceName, names.AttrName, rName),
resource.TestCheckResourceAttr(resourceName, "server_side_encryption_configuration.#", acctest.Ct0),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.chunking_strategy", "FIXED_SIZE"),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.max_tokens", acctest.Ct3),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.overlap_percentage", "80"),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

// Prerequisites:
// * psql run via null_resource/provisioner "local-exec"
// * jq for parsing output from aws cli to retrieve postgres password
Expand Down Expand Up @@ -659,6 +724,57 @@ resource "aws_bedrockagent_data_source" "test" {
`, rName))
}

func testAccDataSourceConfig_fullCustomTransformation(rName, embeddingModel string) string {
return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel),
testAccAgentActionGroupConfig_lambda(rName), fmt.Sprintf(`
resource "aws_bedrockagent_data_source" "test" {
name = %[1]q
knowledge_base_id = aws_bedrockagent_knowledge_base.test.id
data_deletion_policy = "RETAIN"
description = "testing"
data_source_configuration {
type = "S3"
s3_configuration {
bucket_arn = aws_s3_bucket.test.arn
inclusion_prefixes = ["Europe/France/Nouvelle-Aquitaine/Bordeaux"]
}
}
vector_ingestion_configuration {
chunking_configuration {
chunking_strategy = "FIXED_SIZE"
fixed_size_chunking_configuration {
max_tokens = 3
overlap_percentage = 80
}
}
custom_transformation_configuration {
intermediate_storage {
s3_location {
uri = "s3://${aws_s3_bucket.test_im.bucket}/customTransform"
}
}
transformation {
step_to_apply = "POST_CHUNKING"
transformation_function {
transformation_lambda_configuration {
lambda_arn = aws_lambda_function.test_lambda.arn
}
}
}
}
}
}
resource "aws_s3_bucket" "test_im" {
bucket = "%[1]s-im"
}
`, rName))
}

func testAccDataSourceConfig_updated(rName, embeddingModel string) string {
return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(`
resource "aws_bedrockagent_data_source" "test" {
Expand Down
33 changes: 33 additions & 0 deletions website/docs/r/bedrockagent_data_source.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ The `server_side_encryption_configuration` configuration block supports the foll
The `vector_ingestion_configuration` configuration block supports the following arguments:

* `chunking_configuration` - (Optional, Forces new resource) Details about how to chunk the documents in the data source. A chunk refers to an excerpt from a data source that is returned when the knowledge base that it belongs to is queried. See [`chunking_configuration` block](#chunking_configuration-block) for details.
* `custom_transformation_configuration`- (Optional, Forces new resource) Configuration for custom transformation of data source documents.
* `parsing_configuration` - (Optional, Forces new resource) Configuration for custom parsing of data source documents. See [`parsing_configuration` block](#parsing_configuration-block) for details.

### `chunking_configuration` block
Expand Down Expand Up @@ -107,6 +108,38 @@ The `semantic_chunking_configuration` block supports the following arguments:
* `buffer_size` - (Required, Forces new resource) The buffer size.
* `max_tokens` - (Required, Forces new resource) The maximum number of tokens a chunk can contain.

### `custom_transformation_configuration` block

The `custom_transformation_configuration` block supports the following arguments:

* `intermediate_storage` - (Required, Forces new resource) The intermediate storage for custom transformation.
* `transformation_function` - (Required) The configuration of transformation function.

### `intermediate_storage` block

The `intermediate_storage` block supports the following arguments:

* `s3_location` - (Required, Forces new resource) Configuration block for intermedia S3 storage.

### `s3_location` block

The `s3_location` block supports the following arguments:

* `uri` - (Required, Forces new resource) S3 URI for intermediate storage.

### `transformation_function` block

The `transformation_function` block supports the following arguments:

* `step_to_apply` - (Required, Forces new resource) Currently only `POST_CHUNKING` is supported.
* `transformation_lambda_configuration` - (Required, Forces new resource) The lambda configuration for custom transformation.

### `transformation_lambda_configuration` block

The `transformation_lambda_configuration` block supports the following arguments:

* `lambda_arn` - (Required, Forces new resource) The ARN of the lambda to use for custom transformation.

### `parsing_configuration` block

The `parsing_configuration` configuration block supports the following arguments:
Expand Down

0 comments on commit e2d4b22

Please sign in to comment.