Skip to content

Commit

Permalink
Merge pull request hashicorp#24149 from mlafeldt/fix-oss-state-locking
Browse files Browse the repository at this point in the history
Fix & improve state locking of OSS backend
  • Loading branch information
jbardin authored Mar 11, 2020
2 parents ed130ab + 109a893 commit ee756fb
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 87 deletions.
18 changes: 10 additions & 8 deletions backend/remote-state/oss/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"runtime"
"strings"

"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
"github.com/aliyun/alibaba-cloud-sdk-go/services/sts"
Expand All @@ -12,10 +17,11 @@ import (
"github.com/hashicorp/terraform/helper/schema"
"github.com/hashicorp/terraform/helper/validation"
"github.com/jmespath/go-jmespath"
"io/ioutil"
"os"
"runtime"
"strings"

"log"
"net/http"
"strconv"
"time"

"github.com/aliyun/alibaba-cloud-sdk-go/sdk"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
Expand All @@ -24,10 +30,6 @@ import (
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/terraform/version"
"github.com/mitchellh/go-homedir"
"log"
"net/http"
"strconv"
"time"
)

// New creates a new backend for OSS remote state.
Expand Down
21 changes: 3 additions & 18 deletions backend/remote-state/oss/backend_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ import (
"github.com/hashicorp/terraform/state/remote"
"github.com/hashicorp/terraform/states"

"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
"log"
"path"

"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
)

const (
Expand All @@ -38,28 +39,12 @@ func (b *Backend) remoteClient(name string) (*RemoteClient, error) {
otsClient: b.otsClient,
}
if b.otsEndpoint != "" && b.otsTable != "" {
table, err := b.otsClient.DescribeTable(&tablestore.DescribeTableRequest{
_, err := b.otsClient.DescribeTable(&tablestore.DescribeTableRequest{
TableName: b.otsTable,
})
if err != nil {
return client, fmt.Errorf("Error describing table store %s: %#v", b.otsTable, err)
}
for _, t := range table.TableMeta.SchemaEntry {
pkMeta := TableStorePrimaryKeyMeta{
PKName: *t.Name,
}
if *t.Type == tablestore.PrimaryKeyType_INTEGER {
pkMeta.PKType = "Integer"
} else if *t.Type == tablestore.PrimaryKeyType_STRING {
pkMeta.PKType = "String"
} else if *t.Type == tablestore.PrimaryKeyType_BINARY {
pkMeta.PKType = "Binary"
} else {
return client, fmt.Errorf("Unsupported PrimaryKey type: %d.", *t.Type)
}
client.otsTabkePK = pkMeta
break
}
}

return client, nil
Expand Down
4 changes: 2 additions & 2 deletions backend/remote-state/oss/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ func deleteOSSBucket(t *testing.T, ossClient *oss.Client, bucketName string) {
}
}

// create the dynamoDB table, and wait until we can query it.
// create the tablestore table, and wait until we can query it.
func createTablestoreTable(t *testing.T, otsClient *tablestore.TableStoreClient, tableName string) {
tableMeta := new(tablestore.TableMeta)
tableMeta.TableName = tableName
tableMeta.AddPrimaryKeyColumn("testbackend", tablestore.PrimaryKeyType_STRING)
tableMeta.AddPrimaryKeyColumn(pkName, tablestore.PrimaryKeyType_STRING)

tableOption := new(tablestore.TableOption)
tableOption.TimeToAlive = -1
Expand Down
78 changes: 25 additions & 53 deletions backend/remote-state/oss/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,24 @@ import (
"io"

"encoding/hex"
"log"
"sync"
"time"

"github.com/aliyun/aliyun-oss-go-sdk/oss"
"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
"github.com/hashicorp/go-multierror"
uuid "github.com/hashicorp/go-uuid"
"github.com/hashicorp/terraform/helper/hashcode"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/state/remote"
"github.com/pkg/errors"
"log"
"sync"
"time"
)

// Store the last saved serial in tablestore with this suffix for consistency checks.
const (
// Store the last saved serial in tablestore with this suffix for consistency checks.
stateIDSuffix = "-md5"
statePKValue = "terraform-remote-state-lock"

pkName = "LockID"
)

var (
Expand All @@ -39,11 +40,6 @@ var (
// test hook called when checksums don't match
var testChecksumHook func()

type TableStorePrimaryKeyMeta struct {
PKName string
PKType string
}

type RemoteClient struct {
ossClient *oss.Client
otsClient *tablestore.TableStoreClient
Expand All @@ -55,7 +51,6 @@ type RemoteClient struct {
info *state.LockInfo
mu sync.Mutex
otsTable string
otsTabkePK TableStorePrimaryKeyMeta
}

func (c *RemoteClient) Get() (payload *remote.Payload, err error) {
Expand Down Expand Up @@ -157,6 +152,8 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
return "", nil
}

info.Path = c.lockPath()

if info.ID == "" {
lockID, err := uuid.GenerateUUID()
if err != nil {
Expand All @@ -170,16 +167,12 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
Columns: []tablestore.AttributeColumn{
{
ColumnName: "LockID",
Value: c.lockFile,
},
{
ColumnName: "Info",
Value: string(info.Marshal()),
Expand All @@ -190,7 +183,7 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
},
}

log.Printf("[DEBUG] Recoring state lock in tablestore: %#v", putParams)
log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams)

_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
PutRowChange: putParams,
Expand Down Expand Up @@ -223,12 +216,12 @@ func (c *RemoteClient) getMD5() ([]byte, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
ColumnsToGet: []string{"LockID", "Digest"},
ColumnsToGet: []string{pkName, "Digest"},
MaxVersion: 1,
}

Expand Down Expand Up @@ -270,23 +263,19 @@ func (c *RemoteClient) putMD5(sum []byte) error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
Columns: []tablestore.AttributeColumn{
{
ColumnName: "LockID",
Value: c.lockPath() + stateIDSuffix,
},
{
ColumnName: "Digest",
Value: hex.EncodeToString(sum),
},
},
Condition: &tablestore.RowCondition{
RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST,
RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE,
},
}

Expand Down Expand Up @@ -315,8 +304,8 @@ func (c *RemoteClient) deleteMD5() error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
Expand All @@ -341,12 +330,12 @@ func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
ColumnsToGet: []string{"LockID", "Info"},
ColumnsToGet: []string{pkName, "Info"},
MaxVersion: 1,
}

Expand Down Expand Up @@ -394,8 +383,8 @@ func (c *RemoteClient) Unlock(id string) error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
Expand Down Expand Up @@ -457,23 +446,6 @@ func (c *RemoteClient) getObj() (*remote.Payload, error) {
return payload, nil
}

func (c *RemoteClient) getPKValue() (value interface{}) {
value = statePKValue
if c.otsTabkePK.PKType == "Integer" {
value = hashcode.String(statePKValue)
} else if c.otsTabkePK.PKType == "Binary" {
value = stringToBin(statePKValue)
}
return
}

func stringToBin(s string) (binString string) {
for _, c := range s {
binString = fmt.Sprintf("%s%b", binString, c)
}
return
}

const errBadChecksumFmt = `state data in OSS does not have the expected content.
This may be caused by unusually long delays in OSS processing a previous state
Expand Down
47 changes: 47 additions & 0 deletions backend/remote-state/oss/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"bytes"
"crypto/md5"

"github.com/hashicorp/terraform/backend"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/state/remote"
Expand Down Expand Up @@ -84,6 +85,52 @@ func TestRemoteClientLocks(t *testing.T) {
remote.TestRemoteLocks(t, s1.(*remote.State).Client, s2.(*remote.State).Client)
}

// verify that the backend can handle more than one state in the same table
func TestRemoteClientLocks_multipleStates(t *testing.T) {
testACC(t)
bucketName := fmt.Sprintf("tf-remote-oss-test-force-%x", time.Now().Unix())
tableName := fmt.Sprintf("tfRemoteTestForce%x", time.Now().Unix())
path := "testState"

b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{
"bucket": bucketName,
"prefix": path,
"encrypt": true,
"tablestore_table": tableName,
"tablestore_endpoint": RemoteTestUsedOTSEndpoint,
})).(*Backend)

b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{
"bucket": bucketName,
"prefix": path,
"encrypt": true,
"tablestore_table": tableName,
"tablestore_endpoint": RemoteTestUsedOTSEndpoint,
})).(*Backend)

createOSSBucket(t, b1.ossClient, bucketName)
defer deleteOSSBucket(t, b1.ossClient, bucketName)
createTablestoreTable(t, b1.otsClient, tableName)
defer deleteTablestoreTable(t, b1.otsClient, tableName)

s1, err := b1.StateMgr("s1")
if err != nil {
t.Fatal(err)
}
if _, err := s1.Lock(state.NewLockInfo()); err != nil {
t.Fatal("failed to get lock for s1:", err)
}

// s1 is now locked, s2 should not be locked as it's a different state file
s2, err := b2.StateMgr("s2")
if err != nil {
t.Fatal(err)
}
if _, err := s2.Lock(state.NewLockInfo()); err != nil {
t.Fatal("failed to get lock for s2:", err)
}
}

// verify that we can unlock a state with an existing lock
func TestRemoteForceUnlock(t *testing.T) {
testACC(t)
Expand Down
8 changes: 2 additions & 6 deletions website/docs/backends/types/oss.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ the `tablestore_table` field to an existing TableStore table name.

-> **Note:** The OSS backend is available from terraform version 0.12.2.

!> **Warning:** If you set `tablestore_table`, please ensure the table does not contain primary key named
`LockID`, `Info` and `Digest`. Otherwise, there will throw an error `OTSParameterInvalid Duplicated attribute column ...`.

## Example Configuration

```hcl
Expand All @@ -39,7 +36,7 @@ terraform {
This assumes we have a [OSS Bucket](https://www.terraform.io/docs/providers/alicloud/r/oss_bucket.html) created called `bucket-for-terraform-state`,
a [OTS Instance](https://www.terraform.io/docs/providers/alicloud/r/ots_instance.html) called `terraform-remote` and
a [OTS TableStore](https://www.terraform.io/docs/providers/alicloud/r/ots_table.html) called `statelock`. The
Terraform state will be written into the file `path/mystate/version-1.tfstate`. The `TableStore` must have a primary key of type `string`.
Terraform state will be written into the file `path/mystate/version-1.tfstate`. The `TableStore` must have a primary key named `LockID` of type `String`.


## Data Source Configuration
Expand Down Expand Up @@ -90,7 +87,7 @@ The following configuration options or environment variables are supported:
* `prefix` - (Opeional) The path directory of the state file will be stored. Default to "env:".
* `key` - (Optional) The name of the state file. Defaults to `terraform.tfstate`.
* `tablestore_endpoint` / `ALICLOUD_TABLESTORE_ENDPOINT` - (Optional) A custom endpoint for the TableStore API.
* `tablestore_table` - (Optional) A TableStore table for state locking and consistency.
* `tablestore_table` - (Optional) A TableStore table for state locking and consistency. The table must have a primary key named `LockID` of type `String`.
* `encrypt` - (Optional) Whether to enable server side
encryption of the state file. If it is true, OSS will use 'AES256' encryption algorithm to encrypt state file.
* `acl` - (Optional) [Object
Expand All @@ -113,4 +110,3 @@ The nested `assume_role` block supports the following:
* `session_expiration` - (Optional) The time after which the established session for assuming role expires. Valid value range: [900-3600] seconds. Default to 3600 (in this case Alibaba Cloud use own default value). It supports environment variable `ALICLOUD_ASSUME_ROLE_SESSION_EXPIRATION`.

-> **Note:** If you want to store state in the custom OSS endpoint, you can specify a environment variable `OSS_ENDPOINT`, like "oss-cn-beijing-internal.aliyuncs.com"

0 comments on commit ee756fb

Please sign in to comment.